diff --git a/src/rust/codegen.rs b/src/rust/codegen.rs index abf6bf02..d6439622 100644 --- a/src/rust/codegen.rs +++ b/src/rust/codegen.rs @@ -233,6 +233,22 @@ pub fn decr_exc_asize(ctx: &mut JitContext) { } } +pub fn gen_read_reg_xmm128_into_scratch(ctx: &mut JitContext, r: u32) { + ctx.builder + .const_i32(global_pointers::SSE_SCRATCH_REGISTER as i32); + let dest = global_pointers::get_reg_xmm_low_offset(r); + ctx.builder.const_i32(dest as i32); + ctx.builder.load_aligned_i64(0); + ctx.builder.store_aligned_i64(0); + + ctx.builder + .const_i32(global_pointers::SSE_SCRATCH_REGISTER as i32 + 8); + let dest = global_pointers::get_reg_xmm_low_offset(r) + 8; + ctx.builder.const_i32(dest as i32); + ctx.builder.load_aligned_i64(0); + ctx.builder.store_aligned_i64(0); +} + pub fn gen_get_sreg(ctx: &mut JitContext, r: u32) { ctx.builder .load_fixed_u16(global_pointers::get_sreg_offset(r)) diff --git a/src/rust/jit_instructions.rs b/src/rust/jit_instructions.rs index 0fd464c2..93e1b853 100644 --- a/src/rust/jit_instructions.rs +++ b/src/rust/jit_instructions.rs @@ -110,7 +110,10 @@ pub fn sse_read128_xmm_mem(ctx: &mut JitContext, name: &str, modrm_byte: ModrmBy ctx.builder.call_fn2(name); } pub fn sse_read128_xmm_xmm(ctx: &mut JitContext, name: &str, r1: u32, r2: u32) { - let dest = global_pointers::get_reg_xmm_low_offset(r1); + // Make a copy to avoid aliasing problems: Called function expects a reg128, which must not + // alias with memory + codegen::gen_read_reg_xmm128_into_scratch(ctx, r1); + let dest = global_pointers::SSE_SCRATCH_REGISTER; ctx.builder.const_i32(dest as i32); ctx.builder.const_i32(r2 as i32); ctx.builder.call_fn2(name); @@ -4472,7 +4475,8 @@ pub fn instr_660F70_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32, ctx.builder.call_fn3("instr_660F70"); } pub fn instr_660F70_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32, imm8: u32) { - let dest = global_pointers::get_reg_xmm_low_offset(r1); + codegen::gen_read_reg_xmm128_into_scratch(ctx, r1); + let dest = global_pointers::SSE_SCRATCH_REGISTER; ctx.builder.const_i32(dest as i32); ctx.builder.const_i32(r2 as i32); ctx.builder.const_i32(imm8 as i32); @@ -4487,7 +4491,8 @@ pub fn instr_F20F70_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32, ctx.builder.call_fn3("instr_F20F70"); } pub fn instr_F20F70_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32, imm8: u32) { - let dest = global_pointers::get_reg_xmm_low_offset(r1); + codegen::gen_read_reg_xmm128_into_scratch(ctx, r1); + let dest = global_pointers::SSE_SCRATCH_REGISTER; ctx.builder.const_i32(dest as i32); ctx.builder.const_i32(r2 as i32); ctx.builder.const_i32(imm8 as i32); @@ -4502,7 +4507,8 @@ pub fn instr_F30F70_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32, ctx.builder.call_fn3("instr_F30F70"); } pub fn instr_F30F70_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32, imm8: u32) { - let dest = global_pointers::get_reg_xmm_low_offset(r1); + codegen::gen_read_reg_xmm128_into_scratch(ctx, r1); + let dest = global_pointers::SSE_SCRATCH_REGISTER; ctx.builder.const_i32(dest as i32); ctx.builder.const_i32(r2 as i32); ctx.builder.const_i32(imm8 as i32);