From baffa6c0d6ebbfd2c0ced31b73a467987b93e922 Mon Sep 17 00:00:00 2001 From: Fabian Date: Fri, 18 Nov 2022 18:51:32 -0600 Subject: [PATCH] generate code for pshufd (660F70) --- src/rust/cpu/instructions_0f.rs | 1 - src/rust/jit_instructions.rs | 19 +++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/rust/cpu/instructions_0f.rs b/src/rust/cpu/instructions_0f.rs index ea43d59c..cd165e76 100644 --- a/src/rust/cpu/instructions_0f.rs +++ b/src/rust/cpu/instructions_0f.rs @@ -2635,7 +2635,6 @@ pub unsafe fn instr_0F70_reg(r1: i32, r2: i32, imm: i32) { instr_0F70(read_mmx64 pub unsafe fn instr_0F70_mem(addr: i32, r: i32, imm: i32) { instr_0F70(return_on_pagefault!(safe_read64s(addr)), r, imm); } -#[no_mangle] pub unsafe fn instr_660F70(source: reg128, r: i32, imm8: i32) { // pshufd xmm, xmm/mem128, imm8 // XXX: Aligned access or #gp diff --git a/src/rust/jit_instructions.rs b/src/rust/jit_instructions.rs index 87ecdf66..2c55c289 100644 --- a/src/rust/jit_instructions.rs +++ b/src/rust/jit_instructions.rs @@ -6456,10 +6456,25 @@ pub fn instr_0F70_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32, imm8: u32) { ctx.builder.call_fn3_i64_i32_i32("instr_0F70"); } pub fn instr_660F70_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32, imm8: u32) { - sse_read128_xmm_mem_imm(ctx, "instr_660F70", modrm_byte, r, imm8) + let src = global_pointers::sse_scratch_register as u32; + codegen::gen_modrm_resolve_safe_read128(ctx, modrm_byte, src); + for i in 0..4 { + ctx.builder.const_i32(0); + ctx.builder.load_fixed_i32(src + 4 * (imm8 >> 2 * i & 3)); + ctx.builder + .store_aligned_i32(global_pointers::get_reg_xmm_offset(r) + 4 * i); + } } pub fn instr_660F70_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32, imm8: u32) { - sse_read128_xmm_xmm_imm(ctx, "instr_660F70", r1, r2, imm8) + codegen::gen_read_reg_xmm128_into_scratch(ctx, r1); + // TODO: perf: copy less (handle aliased src/dst), use 64-bit loads/stores if possible + let src = global_pointers::sse_scratch_register as u32; + for i in 0..4 { + ctx.builder.const_i32(0); + ctx.builder.load_fixed_i32(src + 4 * (imm8 >> 2 * i & 3)); + ctx.builder + .store_aligned_i32(global_pointers::get_reg_xmm_offset(r2) + 4 * i); + } } pub fn instr_F20F70_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32, imm8: u32) { sse_read128_xmm_mem_imm(ctx, "instr_F20F70", modrm_byte, r, imm8)