v86/src/rust/cpu/sse_instr.rs
2022-11-15 10:14:28 +09:00

362 lines
12 KiB
Rust

use cpu::cpu::*;
use cpu::global_pointers::mxcsr;
pub unsafe fn mov_r_m64(addr: i32, r: i32) {
// mov* m64, mm
let data = read_mmx64s(r);
return_on_pagefault!(safe_write64(addr, data));
transition_fpu_to_mmx();
}
pub unsafe fn movl_r128_m64(addr: i32, r: i32) {
// mov* m64, xmm
let data = read_xmm64s(r);
return_on_pagefault!(safe_write64(addr, data));
}
pub unsafe fn mov_r_r128(r1: i32, r2: i32) {
// mov* xmm, xmm
let data = read_xmm128s(r2);
write_xmm_reg128(r1, data);
}
pub unsafe fn mov_r_m128(addr: i32, r: i32) {
// mov* m128, xmm
let data = read_xmm128s(r);
return_on_pagefault!(safe_write128(addr, data));
}
pub unsafe fn mov_rm_r128(source: reg128, r: i32) {
// mov* xmm, xmm/m128
write_xmm_reg128(r, source);
}
pub unsafe fn movh_r128_m64(addr: i32, r: i32) {
// movhp* m64, xmm
let data = read_xmm128s(r);
return_on_pagefault!(safe_write64(addr, data.u64[1]));
}
pub unsafe fn pand_r128(source: reg128, r: i32) {
// pand xmm, xmm/m128
// XXX: Aligned access or #gp
let destination = read_xmm128s(r);
let mut result = reg128 { i8: [0; 16] };
result.u64[0] = source.u64[0] & destination.u64[0];
result.u64[1] = source.u64[1] & destination.u64[1];
write_xmm_reg128(r, result);
}
pub unsafe fn pandn_r128(source: reg128, r: i32) {
// pandn xmm, xmm/m128
// XXX: Aligned access or #gp
let destination = read_xmm128s(r);
let mut result = reg128 { i8: [0; 16] };
result.u64[0] = source.u64[0] & !destination.u64[0];
result.u64[1] = source.u64[1] & !destination.u64[1];
write_xmm_reg128(r, result);
}
pub unsafe fn pxor_r128(source: reg128, r: i32) {
// pxor xmm, xmm/m128
// XXX: Aligned access or #gp
let destination = read_xmm128s(r);
let mut result = reg128 { i8: [0; 16] };
result.u64[0] = source.u64[0] ^ destination.u64[0];
result.u64[1] = source.u64[1] ^ destination.u64[1];
write_xmm_reg128(r, result);
}
pub unsafe fn por_r128(source: reg128, r: i32) {
// por xmm, xmm/m128
// XXX: Aligned access or #gp
let destination = read_xmm128s(r);
let mut result = reg128 { i8: [0; 16] };
result.u64[0] = source.u64[0] | destination.u64[0];
result.u64[1] = source.u64[1] | destination.u64[1];
write_xmm_reg128(r, result);
}
pub unsafe fn psrlw_r64(r: i32, shift: u64) {
// psrlw mm, {shift}
let destination: [u16; 4] = std::mem::transmute(read_mmx64s(r));
let shift = if shift > 15 { 16 } else { shift };
let mut result = [0; 4];
for i in 0..4 {
result[i] = ((destination[i] as u32) >> shift) as u16
}
write_mmx_reg64(r, std::mem::transmute(result));
transition_fpu_to_mmx();
}
pub unsafe fn psraw_r64(r: i32, shift: u64) {
// psraw mm, {shift}
let destination: [i16; 4] = std::mem::transmute(read_mmx64s(r));
let shift = if shift > 15 { 16 } else { shift };
let mut result = [0; 4];
for i in 0..4 {
result[i] = (destination[i] as i32 >> shift) as i16
}
write_mmx_reg64(r, std::mem::transmute(result));
transition_fpu_to_mmx();
}
pub unsafe fn psllw_r64(r: i32, shift: u64) {
// psllw mm, {shift}
let destination: [i16; 4] = std::mem::transmute(read_mmx64s(r));
let mut result = [0; 4];
if shift <= 15 {
for i in 0..4 {
result[i] = destination[i] << shift
}
}
write_mmx_reg64(r, std::mem::transmute(result));
transition_fpu_to_mmx();
}
pub unsafe fn psrld_r64(r: i32, shift: u64) {
// psrld mm, {shift}
let destination: [u32; 2] = std::mem::transmute(read_mmx64s(r));
let mut result = [0; 2];
if shift <= 31 {
for i in 0..2 {
result[i] = destination[i] >> shift;
}
}
write_mmx_reg64(r, std::mem::transmute(result));
transition_fpu_to_mmx();
}
pub unsafe fn psrad_r64(r: i32, shift: u64) {
// psrad mm, {shift}
let destination: [i32; 2] = std::mem::transmute(read_mmx64s(r));
let shift = if shift > 31 { 31 } else { shift };
let mut result = [0; 2];
for i in 0..2 {
result[i] = destination[i] >> shift;
}
write_mmx_reg64(r, std::mem::transmute(result));
transition_fpu_to_mmx();
}
pub unsafe fn pslld_r64(r: i32, shift: u64) {
// pslld mm, {shift}
let destination: [i32; 2] = std::mem::transmute(read_mmx64s(r));
let mut result = [0; 2];
if shift <= 31 {
for i in 0..2 {
result[i] = destination[i] << shift;
}
}
write_mmx_reg64(r, std::mem::transmute(result));
transition_fpu_to_mmx();
}
pub unsafe fn psrlq_r64(r: i32, shift: u64) {
// psrlq mm, {shift}
let destination = read_mmx64s(r);
let mut result = 0;
if shift <= 63 {
result = destination >> shift
}
write_mmx_reg64(r, result);
transition_fpu_to_mmx();
}
pub unsafe fn psllq_r64(r: i32, shift: u64) {
// psllq mm, {shift}
let destination = read_mmx64s(r);
let mut result = 0;
if shift <= 63 {
result = destination << shift
}
write_mmx_reg64(r, result);
transition_fpu_to_mmx();
}
pub unsafe fn psrlw_r128(r: i32, shift: u64) {
// psrlw xmm, {shift}
let destination = read_xmm128s(r);
let mut dword0: i32 = 0;
let mut dword1: i32 = 0;
let mut dword2: i32 = 0;
let mut dword3: i32 = 0;
if shift <= 15 {
dword0 = destination.u16[0] as i32 >> shift | destination.u16[1] as i32 >> shift << 16;
dword1 = destination.u16[2] as i32 >> shift | destination.u16[3] as i32 >> shift << 16;
dword2 = destination.u16[4] as i32 >> shift | destination.u16[5] as i32 >> shift << 16;
dword3 = destination.u16[6] as i32 >> shift | destination.u16[7] as i32 >> shift << 16
}
write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psraw_r128(r: i32, shift: u64) {
// psraw xmm, {shift}
let destination = read_xmm128s(r);
let shift_clamped = (if shift > 15 { 16 } else { shift as u32 }) as i32;
let dword0 = destination.i16[0] as i32 >> shift_clamped & 0xFFFF
| destination.i16[1] as i32 >> shift_clamped << 16;
let dword1 = destination.i16[2] as i32 >> shift_clamped & 0xFFFF
| destination.i16[3] as i32 >> shift_clamped << 16;
let dword2 = destination.i16[4] as i32 >> shift_clamped & 0xFFFF
| destination.i16[5] as i32 >> shift_clamped << 16;
let dword3 = destination.i16[6] as i32 >> shift_clamped & 0xFFFF
| destination.i16[7] as i32 >> shift_clamped << 16;
write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psllw_r128(r: i32, shift: u64) {
// psllw xmm, {shift}
let destination = read_xmm128s(r);
let mut dword0: i32 = 0;
let mut dword1: i32 = 0;
let mut dword2: i32 = 0;
let mut dword3: i32 = 0;
if shift <= 15 {
dword0 = (destination.u16[0] as i32) << shift & 0xFFFF
| (destination.u16[1] as i32) << shift << 16;
dword1 = (destination.u16[2] as i32) << shift & 0xFFFF
| (destination.u16[3] as i32) << shift << 16;
dword2 = (destination.u16[4] as i32) << shift & 0xFFFF
| (destination.u16[5] as i32) << shift << 16;
dword3 = (destination.u16[6] as i32) << shift & 0xFFFF
| (destination.u16[7] as i32) << shift << 16
}
write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psrld_r128(r: i32, shift: u64) {
// psrld xmm, {shift}
let destination = read_xmm128s(r);
let mut dword0: i32 = 0;
let mut dword1: i32 = 0;
let mut dword2: i32 = 0;
let mut dword3: i32 = 0;
if shift <= 31 {
dword0 = (destination.u32[0] >> shift) as i32;
dword1 = (destination.u32[1] >> shift) as i32;
dword2 = (destination.u32[2] >> shift) as i32;
dword3 = (destination.u32[3] >> shift) as i32
}
write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psrad_r128(r: i32, shift: u64) {
// psrad xmm, {shift}
let destination = read_xmm128s(r);
let shift_clamped = (if shift > 31 { 31 } else { shift }) as i32;
let dword0 = destination.i32[0] >> shift_clamped;
let dword1 = destination.i32[1] >> shift_clamped;
let dword2 = destination.i32[2] >> shift_clamped;
let dword3 = destination.i32[3] >> shift_clamped;
write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn pslld_r128(r: i32, shift: u64) {
// pslld xmm, {shift}
let destination = read_xmm128s(r);
let mut dword0: i32 = 0;
let mut dword1: i32 = 0;
let mut dword2: i32 = 0;
let mut dword3: i32 = 0;
if shift <= 31 {
dword0 = destination.i32[0] << shift;
dword1 = destination.i32[1] << shift;
dword2 = destination.i32[2] << shift;
dword3 = destination.i32[3] << shift
}
write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psrlq_r128(r: i32, shift: u64) {
// psrlq xmm, {shift}
let destination = read_xmm128s(r);
let mut result = reg128 { i8: [0; 16] };
if shift <= 63 {
result.u64[0] = destination.u64[0] >> shift;
result.u64[1] = destination.u64[1] >> shift
}
write_xmm_reg128(r, result);
}
pub unsafe fn psllq_r128(r: i32, shift: u64) {
// psllq xmm, {shift}
let destination = read_xmm128s(r);
let mut result = reg128 { i8: [0; 16] };
if shift <= 63 {
result.u64[0] = destination.u64[0] << shift;
result.u64[1] = destination.u64[1] << shift
}
write_xmm_reg128(r, result);
}
pub unsafe fn sse_comparison(op: i32, x: f64, y: f64) -> bool {
// TODO: Signaling
match op & 7 {
0 => return x == y,
1 => return x < y,
2 => return x <= y,
3 => return x.is_nan() || y.is_nan(),
4 => return x != y || x.is_nan() || y.is_nan(),
5 => return x >= y || x.is_nan() || y.is_nan(),
6 => return x > y || x.is_nan() || y.is_nan(),
7 => return !x.is_nan() && !y.is_nan(),
_ => {
dbg_assert!(false);
return false;
},
};
}
pub unsafe fn sse_min(x: f64, y: f64) -> f64 {
// if both x and y are 0 or x is nan, y is returned
return if x < y { x } else { y };
}
pub unsafe fn sse_max(x: f64, y: f64) -> f64 {
// if both x and y are 0 or x is nan, y is returned
return if x > y { x } else { y };
}
#[no_mangle]
pub unsafe fn sse_convert_with_truncation_f32_to_i32(x: f32) -> i32 {
let x = x.trunc();
if x >= -2147483648.0 && x < 2147483648.0 {
return x as i64 as i32;
}
else {
// TODO: Signal
return -0x80000000;
};
}
#[no_mangle]
pub unsafe fn sse_convert_f32_to_i32(x: f32) -> i32 {
let x = sse_integer_round(x as f64);
if x >= -2147483648.0 && x < 2147483648.0 {
return x as i64 as i32;
}
else {
// TODO: Signal
return -0x80000000;
};
}
#[no_mangle]
pub unsafe fn sse_convert_with_truncation_f64_to_i32(x: f64) -> i32 {
let x = x.trunc();
if x >= -2147483648.0 && x < 2147483648.0 {
return x as i64 as i32;
}
else {
// TODO: Signal
return -0x80000000;
};
}
#[no_mangle]
pub unsafe fn sse_convert_f64_to_i32(x: f64) -> i32 {
let x = sse_integer_round(x);
if x >= -2147483648.0 && x < 2147483648.0 {
return x as i64 as i32;
}
else {
// TODO: Signal
return -0x80000000;
};
}
pub unsafe fn sse_integer_round(f: f64) -> f64 {
// see fpu_integer_round
let rc = *mxcsr >> MXCSR_RC_SHIFT & 3;
if rc == 0 {
// Round to nearest, or even if equidistant
let mut rounded = f.round();
let diff = rounded - f;
if diff == 0.5 || diff == -0.5 {
rounded = 2.0 * (f * 0.5).round()
}
return rounded;
}
else if rc == 1 || rc == 3 && f > 0.0 {
// rc=3 is truncate -> floor for positive numbers
return f.floor();
}
else {
return f.ceil();
};
}