v86/src/rust/codegen.rs

1439 lines
48 KiB
Rust
Raw Normal View History

2018-08-20 19:18:17 +02:00
use cpu::BitSize;
2018-09-03 04:16:42 +02:00
use cpu2::cpu::{TLB_GLOBAL, TLB_HAS_CODE, TLB_NO_USER, TLB_READONLY, TLB_VALID};
use global_pointers;
use jit::JitContext;
use modrm;
use profiler;
2018-07-22 07:26:54 +02:00
use regs;
2018-07-30 09:05:33 +02:00
use wasmgen::module_init;
2018-12-04 23:05:21 +01:00
use wasmgen::module_init::{WasmBuilder, WasmLocal, WasmLocalI64};
use wasmgen::wasm_util::WasmBuf;
extern "C" {
#[no_mangle]
static mut mem8: *mut u8;
}
const CONDITION_FUNCTIONS: [&str; 16] = [
"test_o", "test_no", "test_b", "test_nb", "test_z", "test_nz", "test_be", "test_nbe", "test_s",
"test_ns", "test_p", "test_np", "test_l", "test_nl", "test_le", "test_nle",
];
pub fn gen_set_previous_eip_offset_from_eip(builder: &mut WasmBuilder, n: u32) {
let cs = &mut builder.instruction_body;
2018-08-22 04:06:52 +02:00
cs.const_i32(global_pointers::PREVIOUS_IP as i32); // store address of previous ip
cs.load_aligned_i32(global_pointers::INSTRUCTION_POINTER); // load ip
if n != 0 {
2018-08-22 04:06:52 +02:00
cs.const_i32(n as i32);
cs.add_i32(); // add constant to ip value
}
2018-08-22 04:06:52 +02:00
cs.store_aligned_i32(0); // store it as previous ip
}
pub fn gen_increment_instruction_pointer(builder: &mut WasmBuilder, n: u32) {
let cs = &mut builder.instruction_body;
2018-08-22 04:06:52 +02:00
cs.const_i32(global_pointers::INSTRUCTION_POINTER as i32); // store address of ip
cs.load_aligned_i32(global_pointers::INSTRUCTION_POINTER); // load ip
2018-08-22 04:06:52 +02:00
cs.const_i32(n as i32);
cs.add_i32();
2018-08-22 04:06:52 +02:00
cs.store_aligned_i32(0); // store it back in
}
pub fn gen_relative_jump(builder: &mut WasmBuilder, n: i32) {
// add n to instruction_pointer (without setting the offset as above)
let instruction_body = &mut builder.instruction_body;
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(global_pointers::INSTRUCTION_POINTER as i32);
instruction_body.load_aligned_i32(global_pointers::INSTRUCTION_POINTER);
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(n);
instruction_body.add_i32();
2018-08-22 04:06:52 +02:00
instruction_body.store_aligned_i32(0);
}
pub fn gen_increment_variable(builder: &mut WasmBuilder, variable_address: u32, n: i32) {
builder
.instruction_body
.increment_variable(variable_address, n);
}
pub fn gen_increment_timestamp_counter(builder: &mut WasmBuilder, n: i32) {
gen_increment_variable(builder, global_pointers::TIMESTAMP_COUNTER, n);
}
pub fn gen_increment_mem32(builder: &mut WasmBuilder, addr: u32) {
builder.instruction_body.increment_mem32(addr)
}
pub fn gen_get_reg8(ctx: &mut JitContext, r: u32) {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
match r {
regs::AL | regs::CL | regs::DL | regs::BL => {
ctx.builder
.instruction_body
.get_local(&ctx.register_locals[r as usize]);
ctx.builder.instruction_body.const_i32(0xFF);
ctx.builder.instruction_body.and_i32();
},
regs::AH | regs::CH | regs::DH | regs::BH => {
ctx.builder
.instruction_body
.get_local(&ctx.register_locals[(r - 4) as usize]);
ctx.builder.instruction_body.const_i32(8);
ctx.builder.instruction_body.shr_u_i32();
ctx.builder.instruction_body.const_i32(0xFF);
ctx.builder.instruction_body.and_i32();
},
_ => assert!(false),
}
2018-08-22 04:06:52 +02:00
}
pub fn gen_get_reg16(ctx: &mut JitContext, r: u32) {
ctx.builder
2018-08-22 04:06:52 +02:00
.instruction_body
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
.get_local(&ctx.register_locals[r as usize]);
ctx.builder.instruction_body.const_i32(0xFFFF);
ctx.builder.instruction_body.and_i32();
2018-08-22 04:06:52 +02:00
}
pub fn gen_get_reg32(ctx: &mut JitContext, r: u32) {
ctx.builder
2018-08-22 04:06:52 +02:00
.instruction_body
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
.get_local(&ctx.register_locals[r as usize]);
}
pub fn gen_set_reg8(ctx: &mut JitContext, r: u32) {
match r {
regs::AL | regs::CL | regs::DL | regs::BL => {
// reg32[r] = stack_value & 0xFF | reg32[r] & ~0xFF
ctx.builder.instruction_body.const_i32(0xFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
.instruction_body
.get_local(&ctx.register_locals[r as usize]);
ctx.builder.instruction_body.const_i32(!0xFF);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.or_i32();
ctx.builder
.instruction_body
.set_local(&ctx.register_locals[r as usize]);
},
regs::AH | regs::CH | regs::DH | regs::BH => {
// reg32[r] = stack_value << 8 & 0xFF00 | reg32[r] & ~0xFF00
ctx.builder.instruction_body.const_i32(8);
ctx.builder.instruction_body.shl_i32();
ctx.builder.instruction_body.const_i32(0xFF00);
ctx.builder.instruction_body.and_i32();
ctx.builder
.instruction_body
.get_local(&ctx.register_locals[(r - 4) as usize]);
ctx.builder.instruction_body.const_i32(!0xFF00);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.or_i32();
ctx.builder
.instruction_body
.set_local(&ctx.register_locals[(r - 4) as usize]);
},
_ => assert!(false),
}
}
pub fn gen_set_reg16(ctx: &mut JitContext, r: u32) {
// reg32[r] = v & 0xFFFF | reg32[r] & ~0xFFFF
ctx.builder.instruction_body.const_i32(0xFFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
.instruction_body
.get_local(&ctx.register_locals[r as usize]);
ctx.builder.instruction_body.const_i32(!0xFFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.or_i32();
ctx.builder
.instruction_body
.set_local(&ctx.register_locals[r as usize]);
}
pub fn gen_set_reg32(ctx: &mut JitContext, r: u32) {
ctx.builder
.instruction_body
.set_local(&ctx.register_locals[r as usize]);
2018-08-22 04:06:52 +02:00
}
2021-01-01 02:14:28 +01:00
pub fn gen_get_sreg(ctx: &mut JitContext, r: u32) {
ctx.builder
.instruction_body
.load_aligned_u16(global_pointers::get_sreg_offset(r));
}
2018-08-22 02:36:44 +02:00
/// sign-extend a byte value on the stack and leave it on the stack
pub fn sign_extend_i8(builder: &mut WasmBuilder) {
2018-08-22 04:06:52 +02:00
builder.instruction_body.const_i32(24);
2018-08-22 02:36:44 +02:00
builder.instruction_body.shl_i32();
2018-08-22 04:06:52 +02:00
builder.instruction_body.const_i32(24);
builder.instruction_body.shr_s_i32();
2018-08-22 02:36:44 +02:00
}
2018-09-03 19:47:51 +02:00
/// sign-extend a two byte value on the stack and leave it on the stack
pub fn sign_extend_i16(builder: &mut WasmBuilder) {
builder.instruction_body.const_i32(16);
builder.instruction_body.shl_i32();
builder.instruction_body.const_i32(16);
builder.instruction_body.shr_s_i32();
}
pub fn gen_fn0_const(builder: &mut WasmBuilder, name: &str) {
let fn_idx = builder.get_fn_idx(name, module_init::FN0_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_fn0_const_ret(builder: &mut WasmBuilder, name: &str) {
let fn_idx = builder.get_fn_idx(name, module_init::FN0_RET_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_fn1_const(builder: &mut WasmBuilder, name: &str, arg0: u32) {
let fn_idx = builder.get_fn_idx(name, module_init::FN1_TYPE_INDEX);
2018-08-22 04:06:52 +02:00
builder.instruction_body.const_i32(arg0 as i32);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn1_ret(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
let fn_idx = builder.get_fn_idx(name, module_init::FN1_RET_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn1_ret_f64(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
let fn_idx = builder.get_fn_idx(name, module_init::FN1_RET_F64_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn1_f64_ret_i32(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
let fn_idx = builder.get_fn_idx(name, module_init::FN1_F64_RET_I32_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn1_f64_ret_i64(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
let fn_idx = builder.get_fn_idx(name, module_init::FN1_F64_RET_I64_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn1_ret_i64(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
let fn_idx = builder.get_fn_idx(name, module_init::FN1_RET_I64_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_fn2_const(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32) {
let fn_idx = builder.get_fn_idx(name, module_init::FN2_TYPE_INDEX);
2018-08-22 04:06:52 +02:00
builder.instruction_body.const_i32(arg0 as i32);
builder.instruction_body.const_i32(arg1 as i32);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn1(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _ ) where _ must be left on the stack before calling this
let fn_idx = builder.get_fn_idx(name, module_init::FN1_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn2(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _, _ ) where _ must be left on the stack before calling this
let fn_idx = builder.get_fn_idx(name, module_init::FN2_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn2_i32_f64(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _, _ ) where _ must be left on the stack before calling this
let fn_idx = builder.get_fn_idx(name, module_init::FN2_I32_F64_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
2018-12-04 23:05:21 +01:00
pub fn gen_call_fn2_i32_i64(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _, _ ) where _ must be left on the stack before calling this
let fn_idx = builder.get_fn_idx(name, module_init::FN2_I32_I64_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn1_f64(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _, _ ) where _ must be left on the stack before calling this
let fn_idx = builder.get_fn_idx(name, module_init::FN1_F64_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn2_ret(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _, _ ) where _ must be left on the stack before calling this, and fn returns a value
let fn_idx = builder.get_fn_idx(name, module_init::FN2_RET_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn3(builder: &mut WasmBuilder, name: &str) {
let fn_idx = builder.get_fn_idx(name, module_init::FN3_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn3_i32_i64_i64(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _, _ ) where _ must be left on the stack before calling this
let fn_idx = builder.get_fn_idx(name, module_init::FN3_I32_I64_I64_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_call_fn3_ret(builder: &mut WasmBuilder, name: &str) {
let fn_idx = builder.get_fn_idx(name, module_init::FN3_RET_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_fn3_const(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32, arg2: u32) {
let fn_idx = builder.get_fn_idx(name, module_init::FN3_TYPE_INDEX);
2018-08-22 04:06:52 +02:00
builder.instruction_body.const_i32(arg0 as i32);
builder.instruction_body.const_i32(arg1 as i32);
builder.instruction_body.const_i32(arg2 as i32);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_modrm_fn0(builder: &mut WasmBuilder, name: &str) {
// generates: fn( _ )
let fn_idx = builder.get_fn_idx(name, module_init::FN1_TYPE_INDEX);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_modrm_fn1(builder: &mut WasmBuilder, name: &str, arg0: u32) {
// generates: fn( _, arg0 )
let fn_idx = builder.get_fn_idx(name, module_init::FN2_TYPE_INDEX);
2018-08-22 04:06:52 +02:00
builder.instruction_body.const_i32(arg0 as i32);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_modrm_fn2(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32) {
// generates: fn( _, arg0, arg1 )
let fn_idx = builder.get_fn_idx(name, module_init::FN3_TYPE_INDEX);
2018-08-22 04:06:52 +02:00
builder.instruction_body.const_i32(arg0 as i32);
builder.instruction_body.const_i32(arg1 as i32);
builder.instruction_body.call_fn(fn_idx);
}
pub fn gen_modrm_resolve(ctx: &mut JitContext, modrm_byte: u8) { modrm::gen(ctx, modrm_byte) }
2018-08-22 02:36:44 +02:00
pub fn gen_set_reg8_r(ctx: &mut JitContext, dest: u32, src: u32) {
// generates: reg8[r_dest] = reg8[r_src]
gen_get_reg8(ctx, src);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg8(ctx, dest);
2018-08-22 02:36:44 +02:00
}
pub fn gen_set_reg16_r(ctx: &mut JitContext, dest: u32, src: u32) {
// generates: reg16[r_dest] = reg16[r_src]
gen_get_reg16(ctx, src);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, dest);
}
pub fn gen_set_reg32_r(ctx: &mut JitContext, dest: u32, src: u32) {
// generates: reg32s[r_dest] = reg32s[r_src]
gen_get_reg32(ctx, src);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, dest);
}
pub fn gen_safe_read8(ctx: &mut JitContext) { gen_safe_read(ctx, BitSize::BYTE, None) }
pub fn gen_safe_read16(ctx: &mut JitContext) { gen_safe_read(ctx, BitSize::WORD, None) }
pub fn gen_safe_read32(ctx: &mut JitContext) { gen_safe_read(ctx, BitSize::DWORD, None) }
pub fn gen_safe_read64(ctx: &mut JitContext) { gen_safe_read(ctx, BitSize::QWORD, None) }
pub fn gen_safe_read128(ctx: &mut JitContext, where_to_write: u32) {
gen_safe_read(ctx, BitSize::DQWORD, Some(where_to_write))
}
2018-12-04 23:05:21 +01:00
// only used internally for gen_safe_write
enum GenSafeWriteValue<'a> {
2018-12-04 23:05:21 +01:00
I32(&'a WasmLocal),
I64(&'a WasmLocalI64),
TwoI64s(&'a WasmLocalI64, &'a WasmLocalI64),
2018-12-04 23:05:21 +01:00
}
2018-08-22 02:36:44 +02:00
pub fn gen_safe_write8(ctx: &mut JitContext, address_local: &WasmLocal, value_local: &WasmLocal) {
2018-12-04 23:05:21 +01:00
gen_safe_write(
ctx,
BitSize::BYTE,
address_local,
GenSafeWriteValue::I32(value_local),
2018-12-04 23:05:21 +01:00
)
2018-08-22 02:36:44 +02:00
}
pub fn gen_safe_write16(ctx: &mut JitContext, address_local: &WasmLocal, value_local: &WasmLocal) {
2018-12-04 23:05:21 +01:00
gen_safe_write(
ctx,
BitSize::WORD,
address_local,
GenSafeWriteValue::I32(value_local),
2018-12-04 23:05:21 +01:00
)
}
pub fn gen_safe_write32(ctx: &mut JitContext, address_local: &WasmLocal, value_local: &WasmLocal) {
2018-12-04 23:05:21 +01:00
gen_safe_write(
ctx,
BitSize::DWORD,
address_local,
GenSafeWriteValue::I32(value_local),
2018-12-04 23:05:21 +01:00
)
}
pub fn gen_safe_write64(
ctx: &mut JitContext,
address_local: &WasmLocal,
value_local: &WasmLocalI64,
) {
gen_safe_write(
ctx,
BitSize::QWORD,
address_local,
GenSafeWriteValue::I64(value_local),
)
}
pub fn gen_safe_write128(
ctx: &mut JitContext,
address_local: &WasmLocal,
value_local_low: &WasmLocalI64,
value_local_high: &WasmLocalI64,
) {
gen_safe_write(
ctx,
BitSize::DQWORD,
address_local,
GenSafeWriteValue::TwoI64s(value_local_low, value_local_high),
2018-12-04 23:05:21 +01:00
)
}
fn gen_safe_read(ctx: &mut JitContext, bits: BitSize, where_to_write: Option<u32>) {
// Assumes virtual address has been pushed to the stack, and generates safe_readXX's fast-path
// inline, bailing to safe_readXX_slow if necessary
2018-07-24 13:54:08 +02:00
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
let address_local = ctx.builder.tee_new_local();
2018-07-24 13:54:08 +02:00
// Pseudo: base_on_stack = (uint32_t)address >> 12;
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(12);
ctx.builder.instruction_body.shr_u_i32();
2018-07-24 13:54:08 +02:00
// scale index
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(2);
ctx.builder.instruction_body.shl_i32();
2018-07-24 13:54:08 +02:00
// Pseudo: entry = tlb_data[base_on_stack];
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.load_aligned_i32_from_stack(global_pointers::TLB_DATA);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
let entry_local = ctx.builder.tee_new_local();
2018-07-24 13:54:08 +02:00
// Pseudo: bool can_use_fast_path =
// (entry & 0xFFF & ~TLB_READONLY & ~TLB_GLOBAL & ~TLB_HAS_CODE & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
// (bitsize == 8 ? true : (address & 0xFFF) <= (0x1000 - (bitsize / 8)));
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(
(0xFFF
& !TLB_READONLY
& !TLB_GLOBAL
& !TLB_HAS_CODE
& !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32,
2018-07-24 13:54:08 +02:00
);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.and_i32();
2018-07-24 13:54:08 +02:00
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(TLB_VALID as i32);
ctx.builder.instruction_body.eq_i32();
2018-07-24 13:54:08 +02:00
2018-08-22 02:36:44 +02:00
if bits != BitSize::BYTE {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.const_i32(0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
2018-08-22 02:36:44 +02:00
.instruction_body
.const_i32(0x1000 - bits.bytes() as i32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.le_i32();
2018-07-24 13:54:08 +02:00
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.and_i32();
2018-08-22 02:36:44 +02:00
}
2018-07-24 13:54:08 +02:00
// Pseudo:
// if(can_use_fast_path) leave_on_stack(mem8[entry & ~0xFFF ^ address]);
if bits == BitSize::DQWORD {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.if_void();
}
else if bits == BitSize::QWORD {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.if_i64();
}
else {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.if_i32();
}
if cfg!(feature = "profiler") {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_READ_FAST);
}
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&entry_local);
ctx.builder.instruction_body.const_i32(!0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.xor_i32();
// where_to_write is only used by dqword
dbg_assert!((where_to_write != None) == (bits == BitSize::DQWORD));
match bits {
2018-08-22 02:36:44 +02:00
BitSize::BYTE => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
2018-08-22 02:36:44 +02:00
.instruction_body
.load_u8_from_stack(unsafe { mem8 } as u32);
},
BitSize::WORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.load_unaligned_u16_from_stack(unsafe { mem8 } as u32);
},
BitSize::DWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.load_unaligned_i32_from_stack(unsafe { mem8 } as u32);
},
BitSize::QWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.load_unaligned_i64_from_stack(unsafe { mem8 } as u32);
},
BitSize::DQWORD => {
let where_to_write = where_to_write.unwrap();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
let virt_address_local = ctx.builder.set_new_local();
ctx.builder.instruction_body.const_i32(0);
ctx.builder.instruction_body.get_local(&virt_address_local);
ctx.builder
.instruction_body
.load_unaligned_i64_from_stack(unsafe { mem8 } as u32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.store_unaligned_i64(where_to_write);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(0);
ctx.builder.instruction_body.get_local(&virt_address_local);
ctx.builder
.instruction_body
.load_unaligned_i64_from_stack(unsafe { mem8 } as u32 + 8);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.store_unaligned_i64(where_to_write + 8);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.free_local(virt_address_local);
},
}
2018-07-24 13:54:08 +02:00
// Pseudo:
// else {
// *previous_ip = *instruction_pointer & ~0xFFF | start_of_instruction;
// leave_on_stack(safe_read*_slow_jit(address));
// if(page_fault) { trigger_pagefault_end_jit(); return; }
// }
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.else_();
if cfg!(feature = "profiler") {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.get_local(&entry_local);
gen_call_fn2(ctx.builder, "report_safe_read_jit_slow");
}
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
2018-08-22 04:06:52 +02:00
.const_i32(global_pointers::PREVIOUS_IP as i32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::INSTRUCTION_POINTER);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(!0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
.instruction_body
2018-08-22 04:06:52 +02:00
.const_i32(ctx.start_of_current_instruction as i32 & 0xFFF);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.or_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.store_aligned_i32(0);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&address_local);
match bits {
2018-08-22 02:36:44 +02:00
BitSize::BYTE => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn1_ret(ctx.builder, "safe_read8_slow_jit");
2018-08-22 02:36:44 +02:00
},
BitSize::WORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn1_ret(ctx.builder, "safe_read16_slow_jit");
},
BitSize::DWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn1_ret(ctx.builder, "safe_read32s_slow_jit");
},
BitSize::QWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn1_ret_i64(ctx.builder, "safe_read64s_slow_jit");
},
BitSize::DQWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.const_i32(where_to_write.unwrap() as i32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn2(ctx.builder, "safe_read128s_slow_jit");
},
}
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
2018-08-22 03:49:49 +02:00
.load_u8(global_pointers::PAGE_FAULT);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.if_void();
gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
// -2 for the exit-with-pagefault block, +2 for leaving the two nested ifs from this function
let br_offset = ctx.current_brtable_depth - 2 + 2;
ctx.builder.instruction_body.br(br_offset);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.block_end();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.block_end();
2018-07-24 13:54:08 +02:00
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.free_local(address_local);
ctx.builder.free_local(entry_local);
2018-07-24 13:54:08 +02:00
}
fn gen_safe_write(
ctx: &mut JitContext,
bits: BitSize,
address_local: &WasmLocal,
value_local: GenSafeWriteValue,
) {
// Generates safe_writeXX' fast-path inline, bailing to safe_writeXX_slow if necessary.
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&address_local);
// Pseudo: base_on_stack = (uint32_t)address >> 12;
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(12);
ctx.builder.instruction_body.shr_u_i32();
// scale index
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(2);
ctx.builder.instruction_body.shl_i32();
// Pseudo: entry = tlb_data[base_on_stack];
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.load_aligned_i32_from_stack(global_pointers::TLB_DATA);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
let entry_local = ctx.builder.tee_new_local();
// Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
// (address & 0xFFF) <= (0x1000 - bitsize / 8));
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
2018-08-22 04:06:52 +02:00
.const_i32((0xFFF & !TLB_GLOBAL & !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.and_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(TLB_VALID as i32);
ctx.builder.instruction_body.eq_i32();
2018-08-22 02:36:44 +02:00
if bits != BitSize::BYTE {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.const_i32(0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
2018-08-22 02:36:44 +02:00
.instruction_body
.const_i32(0x1000 - bits.bytes() as i32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.le_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.and_i32();
2018-08-22 02:36:44 +02:00
}
// Pseudo:
// if(can_use_fast_path)
// {
// phys_addr = entry & ~0xFFF ^ address;
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.if_void();
if cfg!(feature = "profiler") {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_WRITE_FAST);
}
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&entry_local);
ctx.builder.instruction_body.const_i32(!0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.xor_i32();
// Pseudo:
// /* continued within can_use_fast_path branch */
// mem8[phys_addr] = value;
2018-12-04 23:05:21 +01:00
match value_local {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
GenSafeWriteValue::I32(local) => ctx.builder.instruction_body.get_local(local),
GenSafeWriteValue::I64(local) => ctx.builder.instruction_body.get_local_i64(local),
GenSafeWriteValue::TwoI64s(local1, local2) => {
assert!(bits == BitSize::DQWORD);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
let virt_address_local = ctx.builder.tee_new_local();
ctx.builder.instruction_body.get_local_i64(local1);
ctx.builder
.instruction_body
.store_unaligned_i64(unsafe { mem8 } as u32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&virt_address_local);
ctx.builder.instruction_body.get_local_i64(local2);
ctx.builder
.instruction_body
.store_unaligned_i64(unsafe { mem8 } as u32 + 8);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.free_local(virt_address_local);
},
2018-12-04 23:05:21 +01:00
}
match bits {
2018-08-22 02:36:44 +02:00
BitSize::BYTE => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.store_u8(unsafe { mem8 } as u32);
2018-08-22 02:36:44 +02:00
},
BitSize::WORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.store_unaligned_u16(unsafe { mem8 } as u32);
},
BitSize::DWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.store_unaligned_i32(unsafe { mem8 } as u32);
},
BitSize::QWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.store_unaligned_i64(unsafe { mem8 } as u32);
},
BitSize::DQWORD => {}, // handled above
}
// Pseudo:
// else {
// *previous_ip = *instruction_pointer & ~0xFFF | start_of_instruction;
// safe_write*_slow_jit(address, value);
// if(page_fault) { trigger_pagefault_end_jit(); return; }
// }
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.else_();
if cfg!(feature = "profiler") {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.get_local(&entry_local);
gen_call_fn2(ctx.builder, "report_safe_write_jit_slow");
}
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
2018-08-22 04:06:52 +02:00
.const_i32(global_pointers::PREVIOUS_IP as i32);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::INSTRUCTION_POINTER);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.const_i32(!0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
.instruction_body
2018-08-22 04:06:52 +02:00
.const_i32(ctx.start_of_current_instruction as i32 & 0xFFF);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.or_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.store_aligned_i32(0);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local(&address_local);
2018-12-04 23:05:21 +01:00
match value_local {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
GenSafeWriteValue::I32(local) => ctx.builder.instruction_body.get_local(local),
GenSafeWriteValue::I64(local) => ctx.builder.instruction_body.get_local_i64(local),
GenSafeWriteValue::TwoI64s(local1, local2) => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.get_local_i64(local1);
ctx.builder.instruction_body.get_local_i64(local2)
},
2018-12-04 23:05:21 +01:00
}
match bits {
2018-08-22 02:36:44 +02:00
BitSize::BYTE => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn2(ctx.builder, "safe_write8_slow_jit");
2018-08-22 02:36:44 +02:00
},
BitSize::WORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn2(ctx.builder, "safe_write16_slow_jit");
},
BitSize::DWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn2(ctx.builder, "safe_write32_slow_jit");
},
BitSize::QWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn2_i32_i64(ctx.builder, "safe_write64_slow_jit");
},
BitSize::DQWORD => {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_call_fn3_i32_i64_i64(ctx.builder, "safe_write128_slow_jit");
},
}
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder
.instruction_body
2018-08-20 20:08:16 +02:00
.load_u8(global_pointers::PAGE_FAULT);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.if_void();
gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
// -2 for the exit-with-pagefault block, +2 for leaving the two nested ifs from this function
let br_offset = ctx.current_brtable_depth - 2 + 2;
ctx.builder.instruction_body.br(br_offset);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.block_end();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.instruction_body.block_end();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
ctx.builder.free_local(entry_local);
}
pub fn gen_clear_prefixes(ctx: &mut JitContext) {
let instruction_body = &mut ctx.builder.instruction_body;
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(global_pointers::PREFIXES as i32); // load address of prefixes
instruction_body.const_i32(0);
instruction_body.store_aligned_i32(0);
}
pub fn gen_add_prefix_bits(ctx: &mut JitContext, mask: u32) {
2018-07-13 02:04:28 +02:00
dbg_assert!(mask < 0x100);
let instruction_body = &mut ctx.builder.instruction_body;
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(global_pointers::PREFIXES as i32); // load address of prefixes
instruction_body.load_aligned_i32(global_pointers::PREFIXES); // load old value
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(mask as i32);
instruction_body.or_i32();
2018-08-22 04:06:52 +02:00
instruction_body.store_aligned_i32(0);
}
2018-07-22 07:26:54 +02:00
pub fn gen_jmp_rel16(builder: &mut WasmBuilder, rel16: u16) {
2018-07-22 07:26:54 +02:00
let cs_offset_addr = global_pointers::get_seg_offset(regs::CS);
builder.instruction_body.load_aligned_i32(cs_offset_addr);
let local = builder.set_new_local();
2018-07-22 07:26:54 +02:00
// generate:
// *instruction_pointer = cs_offset + ((*instruction_pointer - cs_offset + rel16) & 0xFFFF);
{
let instruction_body = &mut builder.instruction_body;
2018-07-22 07:26:54 +02:00
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(global_pointers::INSTRUCTION_POINTER as i32);
2018-07-22 07:26:54 +02:00
instruction_body.load_aligned_i32(global_pointers::INSTRUCTION_POINTER);
instruction_body.get_local(&local);
instruction_body.sub_i32();
2018-07-22 07:26:54 +02:00
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(rel16 as i32);
instruction_body.add_i32();
2018-07-22 07:26:54 +02:00
2018-08-22 04:06:52 +02:00
instruction_body.const_i32(0xFFFF);
instruction_body.and_i32();
2018-07-22 07:26:54 +02:00
instruction_body.get_local(&local);
instruction_body.add_i32();
2018-07-22 07:26:54 +02:00
2018-08-22 04:06:52 +02:00
instruction_body.store_aligned_i32(0);
}
builder.free_local(local);
2018-07-22 07:26:54 +02:00
}
2018-07-22 11:32:29 +02:00
pub fn gen_pop16_ss16(ctx: &mut JitContext) {
2018-07-24 13:54:08 +02:00
// sp = segment_offsets[SS] + reg16[SP] (or just reg16[SP] if has_flat_segmentation)
gen_get_reg16(ctx, regs::SP);
2018-07-24 13:54:08 +02:00
if !ctx.cpu.has_flat_segmentation() {
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::get_seg_offset(regs::SS));
ctx.builder.instruction_body.add_i32();
2018-07-24 13:54:08 +02:00
}
2018-07-23 09:37:44 +02:00
2018-07-24 13:54:08 +02:00
// result = safe_read16(sp)
gen_safe_read16(ctx);
2018-07-23 09:49:13 +02:00
2018-07-24 13:54:08 +02:00
// reg16[SP] += 2;
gen_get_reg16(ctx, regs::SP);
2018-08-22 04:06:52 +02:00
ctx.builder.instruction_body.const_i32(2);
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, regs::SP);
2018-07-22 11:32:29 +02:00
// return value is already on stack
}
pub fn gen_pop16_ss32(ctx: &mut JitContext) {
2018-07-24 13:54:08 +02:00
// esp = segment_offsets[SS] + reg32s[ESP] (or just reg32s[ESP] if has_flat_segmentation)
gen_get_reg32(ctx, regs::ESP);
2018-07-23 09:49:13 +02:00
2018-07-24 13:54:08 +02:00
if !ctx.cpu.has_flat_segmentation() {
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::get_seg_offset(regs::SS));
ctx.builder.instruction_body.add_i32();
2018-07-23 09:49:13 +02:00
}
2018-07-24 13:54:08 +02:00
// result = safe_read16(esp)
gen_safe_read16(ctx);
// reg32s[ESP] += 2;
gen_get_reg32(ctx, regs::ESP);
2018-08-22 04:06:52 +02:00
ctx.builder.instruction_body.const_i32(2);
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, regs::ESP);
2018-07-22 11:32:29 +02:00
// return value is already on stack
}
pub fn gen_pop16(ctx: &mut JitContext) {
if ctx.cpu.ssize_32() {
gen_pop16_ss32(ctx);
}
else {
gen_pop16_ss16(ctx);
}
}
pub fn gen_pop32s_ss16(ctx: &mut JitContext) {
// sp = reg16[SP]
gen_get_reg16(ctx, regs::SP);
2018-07-22 11:32:29 +02:00
2018-07-23 09:49:13 +02:00
// result = safe_read32s(segment_offsets[SS] + sp) (or just sp if has_flat_segmentation)
if !ctx.cpu.has_flat_segmentation() {
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::get_seg_offset(regs::SS));
ctx.builder.instruction_body.add_i32();
2018-07-23 09:49:13 +02:00
}
2018-07-22 11:32:29 +02:00
gen_safe_read32(ctx);
// reg16[SP] = sp + 4;
gen_get_reg16(ctx, regs::SP);
2018-08-22 04:06:52 +02:00
ctx.builder.instruction_body.const_i32(4);
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, regs::SP);
2018-07-22 11:32:29 +02:00
// return value is already on stack
}
pub fn gen_pop32s_ss32(ctx: &mut JitContext) {
// esp = reg32s[ESP]
gen_get_reg32(ctx, regs::ESP);
2018-07-22 11:32:29 +02:00
2018-07-23 09:49:13 +02:00
// result = safe_read32s(segment_offsets[SS] + esp) (or just esp if has_flat_segmentation)
if !ctx.cpu.has_flat_segmentation() {
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::get_seg_offset(regs::SS));
ctx.builder.instruction_body.add_i32();
2018-07-23 09:49:13 +02:00
}
2018-07-22 11:32:29 +02:00
gen_safe_read32(ctx);
// reg32s[ESP] = esp + 4;
gen_get_reg32(ctx, regs::ESP);
2018-08-22 04:06:52 +02:00
ctx.builder.instruction_body.const_i32(4);
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, regs::ESP);
2018-07-22 11:32:29 +02:00
// return value is already on stack
}
pub fn gen_pop32s(ctx: &mut JitContext) {
if ctx.cpu.ssize_32() {
gen_pop32s_ss32(ctx);
}
else {
gen_pop32s_ss16(ctx);
}
}
pub fn gen_adjust_stack_reg(ctx: &mut JitContext, offset: u32) {
if ctx.cpu.ssize_32() {
gen_get_reg32(ctx, regs::ESP);
ctx.builder.instruction_body.const_i32(offset as i32);
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, regs::ESP);
}
else {
gen_get_reg16(ctx, regs::SP);
ctx.builder.instruction_body.const_i32(offset as i32);
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, regs::SP);
}
}
2018-12-18 23:57:21 +01:00
pub fn gen_leave(ctx: &mut JitContext, os32: bool) {
// [e]bp = safe_read{16,32}([e]bp)
if ctx.cpu.ssize_32() {
gen_get_reg32(ctx, regs::EBP);
2018-12-18 23:57:21 +01:00
}
else {
gen_get_reg16(ctx, regs::BP);
2018-12-18 23:57:21 +01:00
}
let old_vbp = ctx.builder.tee_new_local();
if !ctx.cpu.has_flat_segmentation() {
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::get_seg_offset(regs::SS));
ctx.builder.instruction_body.add_i32();
}
if os32 {
gen_safe_read32(ctx);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, regs::EBP);
2018-12-18 23:57:21 +01:00
}
else {
gen_safe_read16(ctx);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, regs::BP);
2018-12-18 23:57:21 +01:00
}
// [e]sp = [e]bp + (os32 ? 4 : 2)
if ctx.cpu.ssize_32() {
ctx.builder.instruction_body.get_local(&old_vbp);
ctx.builder
.instruction_body
.const_i32(if os32 { 4 } else { 2 });
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, regs::ESP);
2018-12-18 23:57:21 +01:00
}
else {
ctx.builder.instruction_body.get_local(&old_vbp);
ctx.builder
.instruction_body
.const_i32(if os32 { 4 } else { 2 });
ctx.builder.instruction_body.add_i32();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, regs::SP);
2018-12-18 23:57:21 +01:00
}
ctx.builder.free_local(old_vbp);
}
pub fn gen_task_switch_test(ctx: &mut JitContext) {
// generate if(cr[0] & (CR0_EM | CR0_TS)) { task_switch_test_void(); return; }
let cr0_offset = global_pointers::get_creg_offset(0);
ctx.builder.instruction_body.load_aligned_i32(cr0_offset);
ctx.builder
.instruction_body
2018-08-22 04:06:52 +02:00
.const_i32((regs::CR0_EM | regs::CR0_TS) as i32);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.if_void();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_move_registers_from_locals_to_memory(ctx);
gen_fn0_const(ctx.builder, "task_switch_test_void");
gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
gen_clear_prefixes(ctx);
ctx.builder.instruction_body.return_();
ctx.builder.instruction_body.block_end();
}
pub fn gen_task_switch_test_mmx(ctx: &mut JitContext) {
// generate if(cr[0] & (CR0_EM | CR0_TS)) { task_switch_test_mmx_void(); return; }
let cr0_offset = global_pointers::get_creg_offset(0);
ctx.builder.instruction_body.load_aligned_i32(cr0_offset);
ctx.builder
.instruction_body
2018-08-22 04:06:52 +02:00
.const_i32((regs::CR0_EM | regs::CR0_TS) as i32);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.if_void();
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_move_registers_from_locals_to_memory(ctx);
gen_fn0_const(ctx.builder, "task_switch_test_mmx_void");
gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
gen_clear_prefixes(ctx);
ctx.builder.instruction_body.return_();
ctx.builder.instruction_body.block_end();
}
2018-08-20 19:18:17 +02:00
pub fn gen_push16(ctx: &mut JitContext, value_local: &WasmLocal) {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
if ctx.cpu.ssize_32() {
gen_get_reg32(ctx, regs::ESP);
}
else {
gen_get_reg16(ctx, regs::SP);
};
2018-08-22 04:06:52 +02:00
ctx.builder.instruction_body.const_i32(2);
ctx.builder.instruction_body.sub_i32();
let reg_updated_local = if !ctx.cpu.ssize_32() || !ctx.cpu.has_flat_segmentation() {
let reg_updated_local = ctx.builder.tee_new_local();
if !ctx.cpu.ssize_32() {
ctx.builder.instruction_body.const_i32(0xFFFF);
ctx.builder.instruction_body.and_i32();
}
if !ctx.cpu.has_flat_segmentation() {
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::get_seg_offset(regs::SS));
ctx.builder.instruction_body.add_i32();
}
let sp_local = ctx.builder.set_new_local();
gen_safe_write16(ctx, &sp_local, &value_local);
ctx.builder.free_local(sp_local);
ctx.builder.instruction_body.get_local(&reg_updated_local);
reg_updated_local
}
else {
// short path: The address written to is equal to ESP/SP minus two
let reg_updated_local = ctx.builder.tee_new_local();
gen_safe_write16(ctx, &reg_updated_local, &value_local);
reg_updated_local
};
2018-08-20 19:18:17 +02:00
if ctx.cpu.ssize_32() {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, regs::ESP);
2018-08-20 19:18:17 +02:00
}
else {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, regs::SP);
2018-08-17 17:53:02 +02:00
};
ctx.builder.free_local(reg_updated_local);
}
2018-08-17 17:53:02 +02:00
2018-08-20 19:18:17 +02:00
pub fn gen_push32(ctx: &mut JitContext, value_local: &WasmLocal) {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
if ctx.cpu.ssize_32() {
gen_get_reg32(ctx, regs::ESP);
}
else {
gen_get_reg16(ctx, regs::SP);
};
2018-08-22 04:06:52 +02:00
ctx.builder.instruction_body.const_i32(4);
2018-08-17 17:53:02 +02:00
ctx.builder.instruction_body.sub_i32();
let new_sp_local = if !ctx.cpu.ssize_32() || !ctx.cpu.has_flat_segmentation() {
let new_sp_local = ctx.builder.tee_new_local();
if !ctx.cpu.ssize_32() {
ctx.builder.instruction_body.const_i32(0xFFFF);
ctx.builder.instruction_body.and_i32();
}
if !ctx.cpu.has_flat_segmentation() {
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::get_seg_offset(regs::SS));
ctx.builder.instruction_body.add_i32();
}
2018-08-17 17:53:02 +02:00
let sp_local = ctx.builder.set_new_local();
2018-08-17 17:53:02 +02:00
gen_safe_write32(ctx, &sp_local, &value_local);
ctx.builder.free_local(sp_local);
ctx.builder.instruction_body.get_local(&new_sp_local);
new_sp_local
}
else {
// short path: The address written to is equal to ESP/SP minus four
let new_sp_local = ctx.builder.tee_new_local();
gen_safe_write32(ctx, &new_sp_local, &value_local);
new_sp_local
};
2018-08-17 17:53:02 +02:00
2018-08-20 19:18:17 +02:00
if ctx.cpu.ssize_32() {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg32(ctx, regs::ESP);
2018-08-20 19:18:17 +02:00
}
else {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_set_reg16(ctx, regs::SP);
};
ctx.builder.free_local(new_sp_local);
2018-08-17 17:53:02 +02:00
}
2018-08-17 18:57:48 +02:00
2018-08-18 09:33:14 +02:00
pub fn gen_get_real_eip(ctx: &mut JitContext) {
let instruction_body = &mut ctx.builder.instruction_body;
instruction_body.load_aligned_i32(global_pointers::INSTRUCTION_POINTER);
instruction_body.load_aligned_i32(global_pointers::get_seg_offset(regs::CS));
instruction_body.sub_i32();
}
pub fn gen_safe_read_write(
2018-08-17 18:57:48 +02:00
ctx: &mut JitContext,
bits: BitSize,
address_local: &WasmLocal,
f: &Fn(&mut JitContext),
fallback_fn: &Fn(&mut JitContext),
2018-08-17 18:57:48 +02:00
) {
ctx.builder.instruction_body.get_local(address_local);
2018-08-17 18:57:48 +02:00
// Pseudo: base_on_stack = (uint32_t)address >> 12;
ctx.builder.instruction_body.const_i32(12);
ctx.builder.instruction_body.shr_u_i32();
2018-08-17 18:57:48 +02:00
// scale index
ctx.builder.instruction_body.const_i32(2);
ctx.builder.instruction_body.shl_i32();
2018-08-17 18:57:48 +02:00
// Pseudo: entry = tlb_data[base_on_stack];
ctx.builder
2018-08-17 18:57:48 +02:00
.instruction_body
.load_aligned_i32_from_stack(global_pointers::TLB_DATA);
let entry_local = ctx.builder.tee_new_local();
2018-08-17 18:57:48 +02:00
// Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_READONLY & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
// (address & 0xFFF) <= (0x1000 - (bitsize / 8));
ctx.builder
.instruction_body
.const_i32((0xFFF & !TLB_GLOBAL & !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32);
ctx.builder.instruction_body.and_i32();
2018-08-17 18:57:48 +02:00
ctx.builder.instruction_body.const_i32(TLB_VALID as i32);
ctx.builder.instruction_body.eq_i32();
2018-08-17 18:57:48 +02:00
if bits != BitSize::BYTE {
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.const_i32(0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
.instruction_body
.const_i32(0x1000 - bits.bytes() as i32);
ctx.builder.instruction_body.le_i32();
ctx.builder.instruction_body.and_i32();
}
2018-08-17 18:57:48 +02:00
// Pseudo:
// if(can_use_fast_path) leave_on_stack(mem8[entry & ~0xFFF ^ address]);
ctx.builder.instruction_body.if_void();
if cfg!(feature = "profiler") {
gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_READ_WRITE_FAST);
}
ctx.builder.instruction_body.get_local(&entry_local);
ctx.builder.instruction_body.const_i32(!0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.xor_i32();
2018-08-17 18:57:48 +02:00
let phys_addr_local = ctx.builder.tee_new_local();
// now store the value on stack to phys_addr
ctx.builder.instruction_body.get_local(&phys_addr_local);
2018-08-17 18:57:48 +02:00
match bits {
BitSize::BYTE => {
ctx.builder
.instruction_body
.load_u8_from_stack(unsafe { mem8 } as u32);
},
2018-08-17 18:57:48 +02:00
BitSize::WORD => {
ctx.builder
2018-08-17 18:57:48 +02:00
.instruction_body
2018-08-20 20:08:45 +02:00
.load_unaligned_u16_from_stack(unsafe { mem8 } as u32);
2018-08-17 18:57:48 +02:00
},
BitSize::DWORD => {
ctx.builder
2018-08-17 18:57:48 +02:00
.instruction_body
2018-08-20 20:08:45 +02:00
.load_unaligned_i32_from_stack(unsafe { mem8 } as u32);
2018-08-17 18:57:48 +02:00
},
BitSize::QWORD => assert!(false), // not used
BitSize::DQWORD => assert!(false), // not used
2018-08-17 18:57:48 +02:00
}
f(ctx);
2018-08-17 18:57:48 +02:00
match bits {
BitSize::BYTE => {
ctx.builder
.instruction_body
.store_u8(unsafe { mem8 } as u32);
},
2018-08-17 18:57:48 +02:00
BitSize::WORD => {
ctx.builder
2018-08-17 18:57:48 +02:00
.instruction_body
2018-08-20 20:08:45 +02:00
.store_unaligned_u16(unsafe { mem8 } as u32);
2018-08-17 18:57:48 +02:00
},
BitSize::DWORD => {
ctx.builder
2018-08-17 18:57:48 +02:00
.instruction_body
2018-08-20 20:08:45 +02:00
.store_unaligned_i32(unsafe { mem8 } as u32);
2018-08-17 18:57:48 +02:00
},
BitSize::QWORD => assert!(false), // not used
BitSize::DQWORD => assert!(false), // not used
2018-08-17 18:57:48 +02:00
};
ctx.builder.free_local(phys_addr_local);
2018-08-17 18:57:48 +02:00
// Pseudo:
// else {
// *previous_ip = *instruction_pointer & ~0xFFF | start_of_instruction;
// fallback_fn();
// if(page_fault) return;
// }
ctx.builder.instruction_body.else_();
2018-08-17 18:57:48 +02:00
if cfg!(feature = "profiler") {
ctx.builder.instruction_body.get_local(&address_local);
ctx.builder.instruction_body.get_local(&entry_local);
gen_call_fn2(ctx.builder, "report_safe_read_write_jit_slow");
}
ctx.builder
.instruction_body
.const_i32(global_pointers::PREVIOUS_IP as i32);
ctx.builder
.instruction_body
.load_aligned_i32(global_pointers::INSTRUCTION_POINTER);
ctx.builder.instruction_body.const_i32(!0xFFF);
ctx.builder.instruction_body.and_i32();
ctx.builder
.instruction_body
.const_i32(ctx.start_of_current_instruction as i32 & 0xFFF);
ctx.builder.instruction_body.or_i32();
ctx.builder.instruction_body.store_aligned_i32(0);
ctx.builder.instruction_body.get_local(&address_local);
fallback_fn(ctx);
ctx.builder
.instruction_body
.load_u8(global_pointers::PAGE_FAULT);
ctx.builder.instruction_body.if_void();
gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_move_registers_from_locals_to_memory(ctx);
gen_clear_prefixes(ctx);
ctx.builder.instruction_body.return_();
ctx.builder.instruction_body.block_end();
ctx.builder.instruction_body.block_end();
ctx.builder.free_local(entry_local);
}
2019-01-07 15:35:57 +01:00
pub fn gen_set_last_op1(builder: &mut WasmBuilder, source: &WasmLocal) {
builder
.instruction_body
.const_i32(global_pointers::LAST_OP1 as i32);
builder.instruction_body.get_local(&source);
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_set_last_op2(builder: &mut WasmBuilder, source: &WasmLocal) {
builder
.instruction_body
.const_i32(global_pointers::LAST_OP2 as i32);
builder.instruction_body.get_local(&source);
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_set_last_add_result(builder: &mut WasmBuilder, source: &WasmLocal) {
builder
.instruction_body
.const_i32(global_pointers::LAST_ADD_RESULT as i32);
builder.instruction_body.get_local(&source);
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_set_last_result(builder: &mut WasmBuilder, source: &WasmLocal) {
builder
.instruction_body
.const_i32(global_pointers::LAST_RESULT as i32);
builder.instruction_body.get_local(&source);
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_set_last_op_size(builder: &mut WasmBuilder, value: i32) {
builder
.instruction_body
.const_i32(global_pointers::LAST_OP_SIZE as i32);
builder.instruction_body.const_i32(value);
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_set_flags_changed(builder: &mut WasmBuilder, value: i32) {
builder
.instruction_body
.const_i32(global_pointers::FLAGS_CHANGED as i32);
builder.instruction_body.const_i32(value);
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_set_flags_bits(builder: &mut WasmBuilder, bits_to_set: i32) {
builder
.instruction_body
.const_i32(global_pointers::FLAGS as i32);
builder
.instruction_body
.load_aligned_i32(global_pointers::FLAGS);
builder.instruction_body.const_i32(bits_to_set);
builder.instruction_body.or_i32();
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_clear_flags_bits(builder: &mut WasmBuilder, bits_to_clear: i32) {
builder
.instruction_body
.const_i32(global_pointers::FLAGS as i32);
builder
.instruction_body
.load_aligned_i32(global_pointers::FLAGS);
builder.instruction_body.const_i32(!bits_to_clear);
builder.instruction_body.and_i32();
builder.instruction_body.store_aligned_i32(0);
}
pub fn gen_fpu_get_sti(ctx: &mut JitContext, i: u32) {
ctx.builder.instruction_body.const_i32(i as i32);
gen_call_fn1_ret_f64(ctx.builder, "fpu_get_sti");
}
pub fn gen_fpu_load_m32(ctx: &mut JitContext) {
gen_safe_read32(ctx);
ctx.builder.instruction_body.reinterpret_i32_as_f32();
ctx.builder.instruction_body.promote_f32_to_f64();
}
pub fn gen_fpu_load_m64(ctx: &mut JitContext) {
gen_safe_read64(ctx);
ctx.builder.instruction_body.reinterpret_i64_as_f64();
}
pub fn gen_trigger_ud(ctx: &mut JitContext) {
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
gen_move_registers_from_locals_to_memory(ctx);
gen_fn0_const(ctx.builder, "trigger_ud");
gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
gen_clear_prefixes(ctx);
ctx.builder.instruction_body.return_();
}
pub fn gen_condition_fn(builder: &mut WasmBuilder, condition: u8) {
dbg_assert!(condition < 16);
let condition_name = CONDITION_FUNCTIONS[condition as usize];
gen_fn0_const_ret(builder, condition_name);
}
Store registers in locals This changes registers to be temporarily stored in wasm locals, across each complete wasm module. Registers are moved from memory to locals upon entering the wasm module and moved from locals to memory upon leaving. Additionally, calls to functions that modify registers are wrapped between moving registers to memory before and moving back to locals after. This affects: 1. All non-custom instructions 2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses) 3. task_switch_test* and trigger_ud 4. All block boundaries 5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses) The performance benefits are currently mostly eaten up by 1. and 4. (if one calculates the total number of read/writes to registers in memory, they are higher after this patch, as each instructions of typ 1. or 4. requires moving all 8 register twice). This can be improved later by the relatively mechanical work of making instructions custom (not necessarily full code generation, only the part of the instruction where registers are accessed). Multi-page wasm module generation will significantly reduce the number of type 4. instructions. Due to 2., the overall code size has significantly increased. This case (the slow path of memory access) is often generated but rarely executed. These moves can be removed in a later patch by a different scheme for safe_{read,write}_slow, which has been left out of this patch for simplicity of reviewing. This also simplifies our code generation for storing registers, as instructions_body.const_i32(register_offset); // some computations ... instruction_body.store_i32(); turns into: // some computations ... write_register(register_index); I.e., a prefix is not necessary anymore as locals are indexed directly. Further patches will allow getting rid of some temporary locals, as registers now can be used directly.
2018-12-25 11:42:03 +01:00
pub fn gen_move_registers_from_locals_to_memory(ctx: &mut JitContext) {
for i in 0..8 {
ctx.builder
.instruction_body
.const_i32(global_pointers::get_reg32_offset(i as u32) as i32);
ctx.builder
.instruction_body
.get_local(&ctx.register_locals[i]);
ctx.builder.instruction_body.store_aligned_i32(0);
}
}
pub fn gen_move_registers_from_memory_to_locals(ctx: &mut JitContext) {
for i in 0..8 {
ctx.builder
.instruction_body
.const_i32(global_pointers::get_reg32_offset(i as u32) as i32);
ctx.builder.instruction_body.load_aligned_i32_from_stack(0);
ctx.builder
.instruction_body
.set_local(&ctx.register_locals[i]);
}
}
pub fn gen_profiler_stat_increment(builder: &mut WasmBuilder, stat: profiler::stat) {
let addr = unsafe { profiler::stat_array.as_mut_ptr().offset(stat as isize) } as u32;
gen_increment_variable(builder, addr, 1)
2018-08-17 18:57:48 +02:00
}
pub fn gen_debug_track_jit_exit(builder: &mut WasmBuilder, address: u32) {
if cfg!(feature = "profiler") {
gen_fn1_const(builder, "track_jit_exit", address);
}
}