Implement some floating point sse1/sse2 instructions (#57)
This commit is contained in:
parent
6f28d8b9c9
commit
7e574dde52
|
@ -494,7 +494,10 @@ const encodings = [
|
|||
{ sse: 1, opcode: 0x660F28, e: 1 },
|
||||
{ sse: 1, opcode: 0x0F29, e: 1 },
|
||||
{ sse: 1, opcode: 0x660F29, e: 1 },
|
||||
{ sse: 1, opcode: 0x0F2A, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F2A, e: 1, },
|
||||
{ sse: 1, opcode: 0x660F2A, e: 1, },
|
||||
{ sse: 1, opcode: 0xF20F2A, e: 1, },
|
||||
{ sse: 1, opcode: 0xF30F2A, e: 1, },
|
||||
{ sse: 1, opcode: 0x0F2B, only_mem: 1, e: 1 },
|
||||
{ sse: 1, opcode: 0x660F2B, only_mem: 1, e: 1 },
|
||||
|
||||
|
@ -510,7 +513,11 @@ const encodings = [
|
|||
{ sse: 1, opcode: 0x660F50, only_reg: 1, e: 1 },
|
||||
{ sse: 1, opcode: 0x0F51, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F52, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F53, skip: 1 },
|
||||
|
||||
// reciprocal: approximation of 1/x. Skipped because our approximation doesn't match intel's
|
||||
{ sse: 1, opcode: 0x0F53, e: 1, skip: 1, },
|
||||
{ sse: 1, opcode: 0xF30F53, e: 1, skip: 1, },
|
||||
|
||||
{ sse: 1, opcode: 0x0F54, e: 1 },
|
||||
{ sse: 1, opcode: 0x660F54, e: 1 },
|
||||
{ sse: 1, opcode: 0x0F55, e: 1 },
|
||||
|
@ -520,14 +527,26 @@ const encodings = [
|
|||
{ sse: 1, opcode: 0x0F57, e: 1 },
|
||||
{ sse: 1, opcode: 0x660F57, e: 1 },
|
||||
|
||||
{ sse: 1, opcode: 0x0F58, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F59, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F58, e: 1, },
|
||||
{ sse: 1, opcode: 0x660F58, e: 1, },
|
||||
{ sse: 1, opcode: 0xF20F58, e: 1, },
|
||||
{ sse: 1, opcode: 0xF30F58, e: 1, },
|
||||
|
||||
{ sse: 1, opcode: 0x0F59, e: 1, },
|
||||
{ sse: 1, opcode: 0x660F59, e: 1, },
|
||||
{ sse: 1, opcode: 0xF20F59, e: 1, },
|
||||
{ sse: 1, opcode: 0xF30F59, e: 1, },
|
||||
|
||||
{ sse: 1, opcode: 0x0F5A, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F5B, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F5C, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F5D, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F5E, skip: 1 },
|
||||
{ sse: 1, opcode: 0x0F5F, skip: 1 },
|
||||
|
||||
{ sse: 1, opcode: 0x0F5C, e: 1, },
|
||||
{ sse: 1, opcode: 0x660F5C, e: 1, },
|
||||
{ sse: 1, opcode: 0xF20F5C, e: 1, },
|
||||
{ sse: 1, opcode: 0xF30F5C, e: 1, },
|
||||
{ sse: 1, opcode: 0x0F5D, skip: 1, },
|
||||
{ sse: 1, opcode: 0x0F5E, skip: 1, },
|
||||
{ sse: 1, opcode: 0x0F5F, skip: 1, },
|
||||
|
||||
{ sse: 1, opcode: 0x660F60, e: 1 },
|
||||
{ sse: 1, opcode: 0x0F60, e: 1 },
|
||||
|
@ -614,7 +633,10 @@ const encodings = [
|
|||
{ sse: 1, opcode: 0x660F7F, e: 1 },
|
||||
{ sse: 1, opcode: 0xF30F7F, e: 1 },
|
||||
|
||||
{ sse: 1, opcode: 0x0FC2, skip: 1, },
|
||||
{ sse: 1, opcode: 0x0FC2, e: 1, imm8: 1 },
|
||||
{ sse: 1, opcode: 0x660FC2, e: 1, imm8: 1 },
|
||||
{ sse: 1, opcode: 0xF20FC2, e: 1, imm8: 1 },
|
||||
{ sse: 1, opcode: 0xF30FC2, e: 1, imm8: 1 },
|
||||
|
||||
{ opcode: 0x0FC3, e: 1, only_mem: 1, }, // movnti: Uses normal registers, hence not marked as sse
|
||||
|
||||
|
|
|
@ -1368,6 +1368,16 @@ void write_mmx_reg64(int32_t r, union reg64 data)
|
|||
reg_mmx[r].u64[0] = data.u64[0];
|
||||
}
|
||||
|
||||
float_t read_xmm_f32(int32_t r)
|
||||
{
|
||||
return reg_xmm[r].f32[0];
|
||||
}
|
||||
|
||||
int32_t read_xmm32(int32_t r)
|
||||
{
|
||||
return reg_xmm[r].u32[0];
|
||||
}
|
||||
|
||||
union reg64 read_xmm64s(int32_t r)
|
||||
{
|
||||
union reg64 x;
|
||||
|
@ -1380,6 +1390,16 @@ union reg128 read_xmm128s(int32_t r)
|
|||
return reg_xmm[r];
|
||||
}
|
||||
|
||||
void write_xmm_f32(int32_t r, float_t data)
|
||||
{
|
||||
reg_xmm[r].f32[0] = data;
|
||||
}
|
||||
|
||||
void write_xmm32(int32_t r, int32_t data)
|
||||
{
|
||||
reg_xmm[r].i32[0] = data;
|
||||
}
|
||||
|
||||
void write_xmm64(int32_t r, union reg64 data)
|
||||
{
|
||||
reg_xmm[r].u64[0] = data.u64[0];
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -17,6 +18,8 @@ union reg128 {
|
|||
uint16_t u16[8];
|
||||
uint32_t u32[4];
|
||||
uint64_t u64[2];
|
||||
float_t f32[4];
|
||||
double_t f64[2];
|
||||
};
|
||||
_Static_assert(sizeof(union reg128) == 16, "reg128 is 16 bytes");
|
||||
|
||||
|
@ -29,6 +32,7 @@ union reg64 {
|
|||
uint16_t u16[4];
|
||||
uint32_t u32[2];
|
||||
uint64_t u64[1];
|
||||
float_t f32[2];
|
||||
double f64[1];
|
||||
};
|
||||
_Static_assert(sizeof(union reg64) == 8, "reg64 is 8 bytes");
|
||||
|
@ -141,8 +145,12 @@ int32_t read_mmx32s(int32_t r);
|
|||
union reg64 read_mmx64s(int32_t r);
|
||||
void write_mmx64(int32_t r, int32_t low, int32_t high);
|
||||
void write_mmx_reg64(int32_t r, union reg64 data);
|
||||
float_t read_xmm_f32(int32_t r);
|
||||
int32_t read_xmm32(int32_t r);
|
||||
union reg64 read_xmm64s(int32_t r);
|
||||
union reg128 read_xmm128s(int32_t r);
|
||||
void write_xmm_f32(int32_t r, float_t data);
|
||||
void write_xmm32(int32_t r, int32_t);
|
||||
void write_xmm64(int32_t r, union reg64 data);
|
||||
void write_xmm128(int32_t r, int32_t i0, int32_t i1, int32_t i2, int32_t i3);
|
||||
void write_xmm_reg128(int32_t r, union reg128 data);
|
||||
|
|
|
@ -621,7 +621,48 @@ void instr_660F29_reg(int32_t r1, int32_t r2) {
|
|||
mov_r_r128(r1, r2);
|
||||
}
|
||||
|
||||
void instr_0F2A() { unimplemented_sse(); }
|
||||
void instr_0F2A(union reg64 source, int32_t r) {
|
||||
// cvtpi2ps xmm, mm/m64
|
||||
// XXX: The non-memory variant causes a transition from x87 FPU to MMX technology operation
|
||||
union reg64 result = {
|
||||
.f32 = {
|
||||
// Note: Casts here can fail
|
||||
source.i32[0],
|
||||
source.i32[1],
|
||||
}
|
||||
};
|
||||
write_xmm64(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_0F2A, safe_read64s, read_mmx64s)
|
||||
void instr_660F2A(union reg64 source, int32_t r) {
|
||||
// cvtpi2pd xmm, xmm/m64
|
||||
// XXX: The non-memory variant causes a transition from x87 FPU to MMX technology operation
|
||||
union reg128 result = {
|
||||
.f64 = {
|
||||
// These casts can't fail
|
||||
source.i32[0],
|
||||
source.i32[1],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_660F2A, safe_read64s, read_mmx64s)
|
||||
void instr_F20F2A(int32_t source, int32_t r) {
|
||||
// cvtsi2sd xmm, r32/m32
|
||||
union reg64 result = {
|
||||
// This cast can't fail
|
||||
.f64 = { source }
|
||||
};
|
||||
write_xmm64(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F20F2A, safe_read32s, read_reg32)
|
||||
void instr_F30F2A(int32_t source, int32_t r) {
|
||||
// cvtsi2ss xmm, r/m32
|
||||
// Note: This cast can fail
|
||||
float_t result = source;
|
||||
write_xmm_f32(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F30F2A, safe_read32s, read_reg32)
|
||||
|
||||
void instr_0F2B_reg(int32_t r1, int32_t r2) { trigger_ud(); }
|
||||
void instr_0F2B_mem(int32_t addr, int32_t r) {
|
||||
|
@ -1005,7 +1046,26 @@ void instr_660F50_mem(int32_t addr, int32_t r1) { trigger_ud(); }
|
|||
|
||||
void instr_0F51() { unimplemented_sse(); }
|
||||
void instr_0F52() { unimplemented_sse(); }
|
||||
void instr_0F53() { unimplemented_sse(); }
|
||||
|
||||
void instr_0F53(union reg128 source, int32_t r) {
|
||||
// rcpps xmm, xmm/m128
|
||||
union reg128 result = {
|
||||
.f32 = {
|
||||
1 / source.f32[0],
|
||||
1 / source.f32[1],
|
||||
1 / source.f32[2],
|
||||
1 / source.f32[3],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_0F53, safe_read128s, read_xmm128s)
|
||||
|
||||
void instr_F30F53(float_t source, int32_t r) {
|
||||
// rcpss xmm, xmm/m32
|
||||
write_xmm_f32(r, 1 / source);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F30F53, fpu_load_m32, read_xmm_f32)
|
||||
|
||||
void instr_0F54(union reg128 source, int32_t r) {
|
||||
// andps xmm, xmm/mem128
|
||||
|
@ -1063,11 +1123,138 @@ void instr_660F57(union reg128 source, int32_t r) {
|
|||
}
|
||||
DEFINE_SSE_SPLIT(instr_660F57, safe_read128s, read_xmm128s)
|
||||
|
||||
void instr_0F58() { unimplemented_sse(); }
|
||||
void instr_0F59() { unimplemented_sse(); }
|
||||
void instr_0F58(union reg128 source, int32_t r) {
|
||||
// addps xmm, xmm/mem128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.f32 = {
|
||||
source.f32[0] + destination.f32[0],
|
||||
source.f32[1] + destination.f32[1],
|
||||
source.f32[2] + destination.f32[2],
|
||||
source.f32[3] + destination.f32[3],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_0F58, safe_read128s, read_xmm128s)
|
||||
void instr_660F58(union reg128 source, int32_t r) {
|
||||
// addpd xmm, xmm/mem128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.f64 = {
|
||||
source.f64[0] + destination.f64[0],
|
||||
source.f64[1] + destination.f64[1],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_660F58, safe_read128s, read_xmm128s)
|
||||
void instr_F20F58(union reg64 source, int32_t r) {
|
||||
// addsd xmm, xmm/mem64
|
||||
union reg64 destination = read_xmm64s(r);
|
||||
union reg64 result = {
|
||||
.f64 = { source.f64[0] + destination.f64[0], }
|
||||
};
|
||||
write_xmm64(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F20F58, safe_read64s, read_xmm64s)
|
||||
void instr_F30F58(float_t source, int32_t r) {
|
||||
// addss xmm, xmm/mem32
|
||||
float_t destination = read_xmm_f32(r);
|
||||
float result = source + destination;
|
||||
write_xmm_f32(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F30F58, fpu_load_m32, read_xmm_f32)
|
||||
|
||||
void instr_0F59(union reg128 source, int32_t r) {
|
||||
// mulps xmm, xmm/mem128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.f32 = {
|
||||
source.f32[0] * destination.f32[0],
|
||||
source.f32[1] * destination.f32[1],
|
||||
source.f32[2] * destination.f32[2],
|
||||
source.f32[3] * destination.f32[3],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_0F59, safe_read128s, read_xmm128s)
|
||||
void instr_660F59(union reg128 source, int32_t r) {
|
||||
// mulpd xmm, xmm/mem128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.f64 = {
|
||||
source.f64[0] * destination.f64[0],
|
||||
source.f64[1] * destination.f64[1],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_660F59, safe_read128s, read_xmm128s)
|
||||
void instr_F20F59(union reg64 source, int32_t r) {
|
||||
// mulsd xmm, xmm/mem64
|
||||
union reg64 destination = read_xmm64s(r);
|
||||
union reg64 result = {
|
||||
.f64 = { source.f64[0] * destination.f64[0], }
|
||||
};
|
||||
write_xmm64(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F20F59, safe_read64s, read_xmm64s)
|
||||
void instr_F30F59(float_t source, int32_t r) {
|
||||
// mulss xmm, xmm/mem32
|
||||
float_t destination = read_xmm_f32(r);
|
||||
float result = source * destination;
|
||||
write_xmm_f32(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F30F59, fpu_load_m32, read_xmm_f32)
|
||||
|
||||
void instr_0F5A() { unimplemented_sse(); }
|
||||
void instr_0F5B() { unimplemented_sse(); }
|
||||
void instr_0F5C() { unimplemented_sse(); }
|
||||
|
||||
void instr_0F5C(union reg128 source, int32_t r) {
|
||||
// subps xmm, xmm/mem128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.f32 = {
|
||||
destination.f32[0] - source.f32[0],
|
||||
destination.f32[1] - source.f32[1],
|
||||
destination.f32[2] - source.f32[2],
|
||||
destination.f32[3] - source.f32[3],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_0F5C, safe_read128s, read_xmm128s)
|
||||
void instr_660F5C(union reg128 source, int32_t r) {
|
||||
// subpd xmm, xmm/mem128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.f64 = {
|
||||
destination.f64[0] - source.f64[0],
|
||||
destination.f64[1] - source.f64[1],
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_660F5C, safe_read128s, read_xmm128s)
|
||||
void instr_F20F5C(union reg64 source, int32_t r) {
|
||||
// subsd xmm, xmm/mem64
|
||||
union reg64 destination = read_xmm64s(r);
|
||||
union reg64 result = {
|
||||
.f64 = { destination.f64[0] - source.f64[0], }
|
||||
};
|
||||
write_xmm64(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F20F5C, safe_read64s, read_xmm64s)
|
||||
void instr_F30F5C(float_t source, int32_t r) {
|
||||
// subss xmm, xmm/mem32
|
||||
float_t destination = read_xmm_f32(r);
|
||||
float result = destination - source;
|
||||
write_xmm_f32(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT(instr_F30F5C, fpu_load_m32, read_xmm_f32)
|
||||
|
||||
void instr_0F5D() { unimplemented_sse(); }
|
||||
void instr_0F5E() { unimplemented_sse(); }
|
||||
void instr_0F5F() { unimplemented_sse(); }
|
||||
|
@ -2261,7 +2448,49 @@ DEFINE_MODRM_INSTR_READ_WRITE_8(instr_0FC0, xadd8(___, get_reg8_index(r)))
|
|||
DEFINE_MODRM_INSTR_READ_WRITE_16(instr16_0FC1, xadd16(___, get_reg16_index(r)))
|
||||
DEFINE_MODRM_INSTR_READ_WRITE_32(instr32_0FC1, xadd32(___, r))
|
||||
|
||||
void instr_0FC2() { unimplemented_sse(); }
|
||||
void instr_0FC2(union reg128 source, int32_t r, int32_t imm8) {
|
||||
// cmpps xmm, xmm/m128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.i32 = {
|
||||
sse_comparison(imm8, destination.f32[0], source.f32[0]) ? -1 : 0,
|
||||
sse_comparison(imm8, destination.f32[1], source.f32[1]) ? -1 : 0,
|
||||
sse_comparison(imm8, destination.f32[2], source.f32[2]) ? -1 : 0,
|
||||
sse_comparison(imm8, destination.f32[3], source.f32[3]) ? -1 : 0,
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT_IMM(instr_0FC2, safe_read128s, read_xmm128s)
|
||||
|
||||
void instr_660FC2(union reg128 source, int32_t r, int32_t imm8) {
|
||||
// cmppd xmm, xmm/m128
|
||||
union reg128 destination = read_xmm128s(r);
|
||||
union reg128 result = {
|
||||
.i64 = {
|
||||
sse_comparison(imm8, destination.f64[0], source.f64[0]) ? -1 : 0,
|
||||
sse_comparison(imm8, destination.f64[1], source.f64[1]) ? -1 : 0,
|
||||
}
|
||||
};
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT_IMM(instr_660FC2, safe_read128s, read_xmm128s)
|
||||
void instr_F20FC2(union reg64 source, int32_t r, int32_t imm8) {
|
||||
// cmpsd xmm, xmm/m64
|
||||
union reg64 destination = read_xmm64s(r);
|
||||
union reg64 result = {
|
||||
.i64 = { sse_comparison(imm8, destination.f64[0], source.f64[0]) ? -1 : 0, }
|
||||
};
|
||||
write_xmm64(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT_IMM(instr_F20FC2, safe_read64s, read_xmm64s)
|
||||
void instr_F30FC2(float_t source, int32_t r, int32_t imm8) {
|
||||
// cmpss xmm, xmm/m32
|
||||
float_t destination = read_xmm_f32(r);
|
||||
int32_t result = sse_comparison(imm8, destination, source) ? -1 : 0;
|
||||
write_xmm32(r, result);
|
||||
}
|
||||
DEFINE_SSE_SPLIT_IMM(instr_F30FC2, fpu_load_m32, read_xmm_f32)
|
||||
|
||||
void instr_0FC3_reg(int32_t r1, int32_t r2) { trigger_ud(); }
|
||||
void instr_0FC3_mem(int32_t addr, int32_t r) {
|
||||
|
|
|
@ -174,7 +174,6 @@ void instr_0F29_mem(int32_t addr, int32_t r);
|
|||
void instr_0F29_reg(int32_t r1, int32_t r2);
|
||||
void instr_660F29_mem(int32_t addr, int32_t r);
|
||||
void instr_660F29_reg(int32_t r1, int32_t r2);
|
||||
void instr_0F2A(void);
|
||||
void instr_0F2B_reg(int32_t r1, int32_t r2);
|
||||
void instr_0F2B_mem(int32_t addr, int32_t r);
|
||||
void instr_660F2B_reg(int32_t r1, int32_t r2);
|
||||
|
@ -275,9 +274,6 @@ void instr_0F50_reg(int32_t r1, int32_t r2);
|
|||
void instr_0F50_mem(int32_t addr, int32_t r1);
|
||||
void instr_660F50_reg(int32_t r1, int32_t r2);
|
||||
void instr_660F50_mem(int32_t addr, int32_t r1);
|
||||
void instr_0F51(void);
|
||||
void instr_0F52(void);
|
||||
void instr_0F53(void);
|
||||
void instr_0F54(union reg128 source, int32_t r);
|
||||
void instr_0F54_reg(int32_t r1, int32_t r2);
|
||||
void instr_0F54_mem(int32_t addr, int32_t r);
|
||||
|
@ -302,14 +298,7 @@ void instr_0F57_mem(int32_t addr, int32_t r);
|
|||
void instr_660F57(union reg128 source, int32_t r);
|
||||
void instr_660F57_reg(int32_t r1, int32_t r2);
|
||||
void instr_660F57_mem(int32_t addr, int32_t r);
|
||||
void instr_0F58(void);
|
||||
void instr_0F59(void);
|
||||
void instr_0F5A(void);
|
||||
void instr_0F5B(void);
|
||||
void instr_0F5C(void);
|
||||
void instr_0F5D(void);
|
||||
void instr_0F5E(void);
|
||||
void instr_0F5F(void);
|
||||
|
||||
void instr_0F60(int32_t source, int32_t r);
|
||||
void instr_0F60_reg(int32_t r1, int32_t r2);
|
||||
void instr_0F60_mem(int32_t addr, int32_t r);
|
||||
|
@ -699,7 +688,6 @@ void instr16_0FC1_mem(int32_t addr, int32_t r);
|
|||
void instr16_0FC1_reg(int32_t r1, int32_t r);
|
||||
void instr32_0FC1_mem(int32_t addr, int32_t r);
|
||||
void instr32_0FC1_reg(int32_t r1, int32_t r);
|
||||
void instr_0FC2(void);
|
||||
void instr_0FC3_reg(int32_t r1, int32_t r2);
|
||||
void instr_0FC3_mem(int32_t addr, int32_t r);
|
||||
void instr_0FC4(int32_t source, int32_t r, int32_t imm8);
|
||||
|
|
|
@ -398,3 +398,22 @@ void psllq_r128(int32_t r, uint32_t shift)
|
|||
|
||||
write_xmm_reg128(r, result);
|
||||
}
|
||||
|
||||
bool sse_comparison(int32_t op, double_t x, double_t y)
|
||||
{
|
||||
// TODO: Signaling
|
||||
|
||||
switch(op & 7)
|
||||
{
|
||||
case 0: return x == y;
|
||||
case 1: return x < y;
|
||||
case 2: return x <= y;
|
||||
case 3: return isnan(x) || isnan(y);
|
||||
case 4: return x != y || isnan(x) || isnan(y);
|
||||
case 5: return x >= y || isnan(x) || isnan(y);
|
||||
case 6: return x > y || isnan(x) || isnan(y);
|
||||
case 7: return !isnan(x) && !isnan(y);
|
||||
}
|
||||
|
||||
assert(false);
|
||||
}
|
||||
|
|
|
@ -31,3 +31,5 @@ void psrad_r128(int32_t r, uint32_t shift);
|
|||
void pslld_r128(int32_t r, uint32_t shift);
|
||||
void psrlq_r128(int32_t r, uint32_t shift);
|
||||
void psllq_r128(int32_t r, uint32_t shift);
|
||||
|
||||
bool sse_comparison(int32_t op, double_t x, double_t y);
|
||||
|
|
|
@ -2791,7 +2791,6 @@ void test_sse(void)
|
|||
MOVMSK(movmskpd);
|
||||
|
||||
/* FPU specific ops */
|
||||
/*
|
||||
{
|
||||
uint32_t mxcsr;
|
||||
asm volatile("stmxcsr %0" : "=m" (mxcsr));
|
||||
|
@ -2822,10 +2821,10 @@ void test_sse(void)
|
|||
SSE_OPS(add);
|
||||
SSE_OPS(mul);
|
||||
SSE_OPS(sub);
|
||||
SSE_OPS(min);
|
||||
SSE_OPS(div);
|
||||
SSE_OPS(max);
|
||||
SSE_OPS(sqrt);
|
||||
//SSE_OPS(min);
|
||||
//SSE_OPS(div);
|
||||
//SSE_OPS(max);
|
||||
//SSE_OPS(sqrt);
|
||||
SSE_OPS(cmpeq);
|
||||
SSE_OPS(cmplt);
|
||||
SSE_OPS(cmple);
|
||||
|
@ -2847,10 +2846,10 @@ void test_sse(void)
|
|||
SSE_OPD(add);
|
||||
SSE_OPD(mul);
|
||||
SSE_OPD(sub);
|
||||
SSE_OPD(min);
|
||||
SSE_OPD(div);
|
||||
SSE_OPD(max);
|
||||
SSE_OPD(sqrt);
|
||||
//SSE_OPD(min);
|
||||
//SSE_OPD(div);
|
||||
//SSE_OPD(max);
|
||||
//SSE_OPD(sqrt);
|
||||
SSE_OPD(cmpeq);
|
||||
SSE_OPD(cmplt);
|
||||
SSE_OPD(cmple);
|
||||
|
@ -2860,7 +2859,6 @@ void test_sse(void)
|
|||
SSE_OPD(cmpnle);
|
||||
SSE_OPD(cmpord);
|
||||
}
|
||||
*/
|
||||
|
||||
/* float to float/int */
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue