v86/src/rust/cpu/sse_instr.rs

use cpu::cpu::*;
use cpu::global_pointers::mxcsr;

pub unsafe fn mov_r_m64(addr: i32, r: i32) {
    // mov* m64, mm
    let data = read_mmx64s(r);
    return_on_pagefault!(safe_write64(addr, data));
    transition_fpu_to_mmx();
}
pub unsafe fn movl_r128_m64(addr: i32, r: i32) {
    // mov* m64, xmm
    let data = read_xmm64s(r);
    return_on_pagefault!(safe_write64(addr, data));
}
pub unsafe fn mov_r_r128(r1: i32, r2: i32) {
    // mov* xmm, xmm
    let data = read_xmm128s(r2);
    write_xmm_reg128(r1, data);
}
pub unsafe fn mov_r_m128(addr: i32, r: i32) {
    // mov* m128, xmm
    let data = read_xmm128s(r);
    return_on_pagefault!(safe_write128(addr, data));
}
pub unsafe fn mov_rm_r128(source: reg128, r: i32) {
    // mov* xmm, xmm/m128
    write_xmm_reg128(r, source);
}
pub unsafe fn movh_r128_m64(addr: i32, r: i32) {
    // movhp* m64, xmm
    let data = read_xmm128s(r);
    return_on_pagefault!(safe_write64(addr, data.u64[1]));
}

pub unsafe fn pand_r128(source: reg128, r: i32) {
    // pand xmm, xmm/m128
    // XXX: Aligned access or #gp
    let destination = read_xmm128s(r);
    let mut result = reg128 { i8: [0; 16] };
    result.u64[0] = source.u64[0] & destination.u64[0];
    result.u64[1] = source.u64[1] & destination.u64[1];
    write_xmm_reg128(r, result);
}
pub unsafe fn pandn_r128(source: reg128, r: i32) {
    // pandn xmm, xmm/m128
    // XXX: Aligned access or #gp
    let destination = read_xmm128s(r);
    let mut result = reg128 { i8: [0; 16] };
    result.u64[0] = source.u64[0] & !destination.u64[0];
    result.u64[1] = source.u64[1] & !destination.u64[1];
    write_xmm_reg128(r, result);
}
pub unsafe fn pxor_r128(source: reg128, r: i32) {
    // pxor xmm, xmm/m128
    // XXX: Aligned access or #gp
    let destination = read_xmm128s(r);
    let mut result = reg128 { i8: [0; 16] };
    result.u64[0] = source.u64[0] ^ destination.u64[0];
    result.u64[1] = source.u64[1] ^ destination.u64[1];
    write_xmm_reg128(r, result);
}
pub unsafe fn por_r128(source: reg128, r: i32) {
    // por xmm, xmm/m128
    // XXX: Aligned access or #gp
    let destination = read_xmm128s(r);
    let mut result = reg128 { i8: [0; 16] };
    result.u64[0] = source.u64[0] | destination.u64[0];
    result.u64[1] = source.u64[1] | destination.u64[1];
    write_xmm_reg128(r, result);
}

pub unsafe fn psrlw_r64(r: i32, shift: u64) {
    // psrlw mm, {shift}
    let destination: [u16; 4] = std::mem::transmute(read_mmx64s(r));
    let shift = if shift > 15 { 16 } else { shift };
    let mut result = [0; 4];
    for i in 0..4 {
        result[i] = ((destination[i] as u32) >> shift) as u16
    }
    write_mmx_reg64(r, std::mem::transmute(result));
    transition_fpu_to_mmx();
}
pub unsafe fn psraw_r64(r: i32, shift: u64) {
    // psraw mm, {shift}
    let destination: [i16; 4] = std::mem::transmute(read_mmx64s(r));
    let shift = if shift > 15 { 16 } else { shift };
    let mut result = [0; 4];
    for i in 0..4 {
        result[i] = (destination[i] as i32 >> shift) as i16
    }
    write_mmx_reg64(r, std::mem::transmute(result));
    transition_fpu_to_mmx();
}
pub unsafe fn psllw_r64(r: i32, shift: u64) {
    // psllw mm, {shift}
    let destination: [i16; 4] = std::mem::transmute(read_mmx64s(r));
    let mut result = [0; 4];
    if shift <= 15 {
        for i in 0..4 {
            result[i] = destination[i] << shift
        }
    }
    write_mmx_reg64(r, std::mem::transmute(result));
    transition_fpu_to_mmx();
}
pub unsafe fn psrld_r64(r: i32, shift: u64) {
    // psrld mm, {shift}
    let destination: [u32; 2] = std::mem::transmute(read_mmx64s(r));
    let mut result = [0; 2];
    if shift <= 31 {
        for i in 0..2 {
            result[i] = destination[i] >> shift;
        }
    }
    write_mmx_reg64(r, std::mem::transmute(result));
    transition_fpu_to_mmx();
}
pub unsafe fn psrad_r64(r: i32, shift: u64) {
    // psrad mm, {shift}
    let destination: [i32; 2] = std::mem::transmute(read_mmx64s(r));
    let shift = if shift > 31 { 31 } else { shift };
    let mut result = [0; 2];
    for i in 0..2 {
        result[i] = destination[i] >> shift;
    }
    write_mmx_reg64(r, std::mem::transmute(result));
    transition_fpu_to_mmx();
}
pub unsafe fn pslld_r64(r: i32, shift: u64) {
    // pslld mm, {shift}
    let destination: [i32; 2] = std::mem::transmute(read_mmx64s(r));
    let mut result = [0; 2];
    if shift <= 31 {
        for i in 0..2 {
            result[i] = destination[i] << shift;
        }
    }
    write_mmx_reg64(r, std::mem::transmute(result));
    transition_fpu_to_mmx();
}
pub unsafe fn psrlq_r64(r: i32, shift: u64) {
    // psrlq mm, {shift}
    let destination = read_mmx64s(r);
    let mut result = 0;
    if shift <= 63 {
        result = destination >> shift
    }
    write_mmx_reg64(r, result);
    transition_fpu_to_mmx();
}
pub unsafe fn psllq_r64(r: i32, shift: u64) {
    // psllq mm, {shift}
    let destination = read_mmx64s(r);
    let mut result = 0;
    if shift <= 63 {
        result = destination << shift
    }
    write_mmx_reg64(r, result);
    transition_fpu_to_mmx();
}
pub unsafe fn psrlw_r128(r: i32, shift: u64) {
    // psrlw xmm, {shift}
    let destination = read_xmm128s(r);
    let mut dword0: i32 = 0;
    let mut dword1: i32 = 0;
    let mut dword2: i32 = 0;
    let mut dword3: i32 = 0;
    if shift <= 15 {
        dword0 = destination.u16[0] as i32 >> shift | destination.u16[1] as i32 >> shift << 16;
        dword1 = destination.u16[2] as i32 >> shift | destination.u16[3] as i32 >> shift << 16;
        dword2 = destination.u16[4] as i32 >> shift | destination.u16[5] as i32 >> shift << 16;
        dword3 = destination.u16[6] as i32 >> shift | destination.u16[7] as i32 >> shift << 16
    }
    write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psraw_r128(r: i32, shift: u64) {
    // psraw xmm, {shift}
    let destination = read_xmm128s(r);
    let shift_clamped = (if shift > 15 { 16 } else { shift as u32 }) as i32;
    let dword0 = destination.i16[0] as i32 >> shift_clamped & 0xFFFF
        | destination.i16[1] as i32 >> shift_clamped << 16;
    let dword1 = destination.i16[2] as i32 >> shift_clamped & 0xFFFF
        | destination.i16[3] as i32 >> shift_clamped << 16;
    let dword2 = destination.i16[4] as i32 >> shift_clamped & 0xFFFF
        | destination.i16[5] as i32 >> shift_clamped << 16;
    let dword3 = destination.i16[6] as i32 >> shift_clamped & 0xFFFF
        | destination.i16[7] as i32 >> shift_clamped << 16;
    write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psllw_r128(r: i32, shift: u64) {
    // psllw xmm, {shift}
    let destination = read_xmm128s(r);
    let mut dword0: i32 = 0;
    let mut dword1: i32 = 0;
    let mut dword2: i32 = 0;
    let mut dword3: i32 = 0;
    if shift <= 15 {
        dword0 = (destination.u16[0] as i32) << shift & 0xFFFF
            | (destination.u16[1] as i32) << shift << 16;
        dword1 = (destination.u16[2] as i32) << shift & 0xFFFF
            | (destination.u16[3] as i32) << shift << 16;
        dword2 = (destination.u16[4] as i32) << shift & 0xFFFF
            | (destination.u16[5] as i32) << shift << 16;
        dword3 = (destination.u16[6] as i32) << shift & 0xFFFF
            | (destination.u16[7] as i32) << shift << 16
    }
    write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psrld_r128(r: i32, shift: u64) {
    // psrld xmm, {shift}
    let destination = read_xmm128s(r);
    let mut dword0: i32 = 0;
    let mut dword1: i32 = 0;
    let mut dword2: i32 = 0;
    let mut dword3: i32 = 0;
    if shift <= 31 {
        dword0 = (destination.u32[0] >> shift) as i32;
        dword1 = (destination.u32[1] >> shift) as i32;
        dword2 = (destination.u32[2] >> shift) as i32;
        dword3 = (destination.u32[3] >> shift) as i32
    }
    write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psrad_r128(r: i32, shift: u64) {
    // psrad xmm, {shift}
    let destination = read_xmm128s(r);
    let shift_clamped = (if shift > 31 { 31 } else { shift }) as i32;
    let dword0 = destination.i32[0] >> shift_clamped;
    let dword1 = destination.i32[1] >> shift_clamped;
    let dword2 = destination.i32[2] >> shift_clamped;
    let dword3 = destination.i32[3] >> shift_clamped;
    write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn pslld_r128(r: i32, shift: u64) {
    // pslld xmm, {shift}
    let destination = read_xmm128s(r);
    let mut dword0: i32 = 0;
    let mut dword1: i32 = 0;
    let mut dword2: i32 = 0;
    let mut dword3: i32 = 0;
    if shift <= 31 {
        dword0 = destination.i32[0] << shift;
        dword1 = destination.i32[1] << shift;
        dword2 = destination.i32[2] << shift;
        dword3 = destination.i32[3] << shift
    }
    write_xmm128(r, dword0, dword1, dword2, dword3);
}
pub unsafe fn psrlq_r128(r: i32, shift: u64) {
    // psrlq xmm, {shift}
    let destination = read_xmm128s(r);
    let mut result = reg128 { i8: [0; 16] };
    if shift <= 63 {
        result.u64[0] = destination.u64[0] >> shift;
        result.u64[1] = destination.u64[1] >> shift
    }
    write_xmm_reg128(r, result);
}
pub unsafe fn psllq_r128(r: i32, shift: u64) {
    // psllq xmm, {shift}
    let destination = read_xmm128s(r);
    let mut result = reg128 { i8: [0; 16] };
    if shift <= 63 {
        result.u64[0] = destination.u64[0] << shift;
        result.u64[1] = destination.u64[1] << shift
    }
    write_xmm_reg128(r, result);
}

pub unsafe fn sse_comparison(op: i32, x: f64, y: f64) -> bool {
    // TODO: Signaling
    match op & 7 {
        0 => return x == y,
        1 => return x < y,
        2 => return x <= y,
        3 => return x.is_nan() || y.is_nan(),
        4 => return x != y || x.is_nan() || y.is_nan(),
        5 => return x >= y || x.is_nan() || y.is_nan(),
        6 => return x > y || x.is_nan() || y.is_nan(),
        7 => return !x.is_nan() && !y.is_nan(),
        _ => {
            dbg_assert!(false);
            return false;
        },
    };
}
pub unsafe fn sse_min(x: f64, y: f64) -> f64 {
    // if both x and y are 0 or x is nan, y is returned
    return if x < y { x } else { y };
}
pub unsafe fn sse_max(x: f64, y: f64) -> f64 {
    // if both x and y are 0 or x is nan, y is returned
    return if x > y { x } else { y };
}

#[no_mangle]
pub unsafe fn sse_convert_with_truncation_f32_to_i32(x: f32) -> i32 {
    let x = x.trunc();
    if x >= -2147483648.0 && x < 2147483648.0 {
        return x as i64 as i32;
    }
    else {
        // TODO: Signal
        return -0x80000000;
    };
}
#[no_mangle]
pub unsafe fn sse_convert_f32_to_i32(x: f32) -> i32 {
    let x = sse_integer_round(x as f64);
    if x >= -2147483648.0 && x < 2147483648.0 {
        return x as i64 as i32;
    }
    else {
        // TODO: Signal
        return -0x80000000;
    };
}

#[no_mangle]
pub unsafe fn sse_convert_with_truncation_f64_to_i32(x: f64) -> i32 {
    let x = x.trunc();
    if x >= -2147483648.0 && x < 2147483648.0 {
        return x as i64 as i32;
    }
    else {
        // TODO: Signal
        return -0x80000000;
    };
}
#[no_mangle]
pub unsafe fn sse_convert_f64_to_i32(x: f64) -> i32 {
    let x = sse_integer_round(x);
    if x >= -2147483648.0 && x < 2147483648.0 {
        return x as i64 as i32;
    }
    else {
        // TODO: Signal
        return -0x80000000;
    };
}

pub unsafe fn sse_integer_round(f: f64) -> f64 {
    // see fpu_integer_round
    let rc = *mxcsr >> MXCSR_RC_SHIFT & 3;
    if rc == 0 {
        // Round to nearest, or even if equidistant
        let mut rounded = f.round();
        let diff = rounded - f;
        if diff == 0.5 || diff == -0.5 {
            rounded = 2.0 * (f * 0.5).round()
        }
        return rounded;
    }
    else if rc == 1 || rc == 3 && f > 0.0 {
        // rc=3 is truncate -> floor for positive numbers
        return f.floor();
    }
    else {
        return f.ceil();
    };
}