Implement {min,max,div}{p,s}{s,d} sse instructions (#57)

This commit is contained in:
Fabian 2018-07-30 18:00:34 -05:00
parent c10bbca85e
commit 6fa702c8aa
5 changed files with 168 additions and 20 deletions

View file

@ -531,25 +531,34 @@ const encodings = [
{ sse: 1, opcode: 0x660F57, e: 1 },
{ sse: 1, opcode: 0x0F58, e: 1, },
{ sse: 1, opcode: 0x660F58, e: 1, },
{ sse: 1, opcode: 0x660F58, e: 1, },
{ sse: 1, opcode: 0xF20F58, e: 1, },
{ sse: 1, opcode: 0xF30F58, e: 1, },
{ sse: 1, opcode: 0x0F59, e: 1, },
{ sse: 1, opcode: 0x660F59, e: 1, },
{ sse: 1, opcode: 0x660F59, e: 1, },
{ sse: 1, opcode: 0xF20F59, e: 1, },
{ sse: 1, opcode: 0xF30F59, e: 1, },
{ sse: 1, opcode: 0x0F5A, skip: 1 },
{ sse: 1, opcode: 0x0F5B, skip: 1 },
{ sse: 1, opcode: 0x0F5A, skip: 1, },
{ sse: 1, opcode: 0x0F5B, skip: 1, },
{ sse: 1, opcode: 0x0F5C, e: 1, },
{ sse: 1, opcode: 0x660F5C, e: 1, },
{ sse: 1, opcode: 0x660F5C, e: 1, },
{ sse: 1, opcode: 0xF20F5C, e: 1, },
{ sse: 1, opcode: 0xF30F5C, e: 1, },
{ sse: 1, opcode: 0x0F5D, skip: 1, },
{ sse: 1, opcode: 0x0F5E, skip: 1, },
{ sse: 1, opcode: 0x0F5F, skip: 1, },
{ sse: 1, opcode: 0x0F5D, e: 1, },
{ sse: 1, opcode: 0x660F5D, e: 1, },
{ sse: 1, opcode: 0xF20F5D, e: 1, },
{ sse: 1, opcode: 0xF30F5D, e: 1, },
{ sse: 1, opcode: 0x0F5E, e: 1, },
{ sse: 1, opcode: 0x660F5E, e: 1, },
{ sse: 1, opcode: 0xF20F5E, e: 1, },
{ sse: 1, opcode: 0xF30F5E, e: 1, },
{ sse: 1, opcode: 0x0F5F, e: 1, },
{ sse: 1, opcode: 0x660F5F, e: 1, },
{ sse: 1, opcode: 0xF20F5F, e: 1, },
{ sse: 1, opcode: 0xF30F5F, e: 1, },
{ sse: 1, opcode: 0x660F60, e: 1 },
{ sse: 1, opcode: 0x0F60, e: 1 },

View file

@ -1294,9 +1294,134 @@ void instr_F30F5C(float_t source, int32_t r) {
}
DEFINE_SSE_SPLIT(instr_F30F5C, fpu_load_m32, read_xmm_f32)
void instr_0F5D() { unimplemented_sse(); }
void instr_0F5E() { unimplemented_sse(); }
void instr_0F5F() { unimplemented_sse(); }
void instr_0F5D(union reg128 source, int32_t r) {
// minps xmm, xmm/mem128
union reg128 destination = read_xmm128s(r);
union reg128 result = {
.f32 = {
sse_min(destination.f32[0], source.f32[0]),
sse_min(destination.f32[1], source.f32[1]),
sse_min(destination.f32[2], source.f32[2]),
sse_min(destination.f32[3], source.f32[3]),
}
};
write_xmm_reg128(r, result);
}
DEFINE_SSE_SPLIT(instr_0F5D, safe_read128s, read_xmm128s)
void instr_660F5D(union reg128 source, int32_t r) {
// minpd xmm, xmm/mem128
union reg128 destination = read_xmm128s(r);
union reg128 result = {
.f64 = {
sse_min(destination.f64[0], source.f64[0]),
sse_min(destination.f64[1], source.f64[1]),
}
};
write_xmm_reg128(r, result);
}
DEFINE_SSE_SPLIT(instr_660F5D, safe_read128s, read_xmm128s)
void instr_F20F5D(union reg64 source, int32_t r) {
// minsd xmm, xmm/mem64
union reg64 destination = read_xmm64s(r);
union reg64 result = {
.f64 = { sse_min(destination.f64[0], source.f64[0]), }
};
write_xmm64(r, result);
}
DEFINE_SSE_SPLIT(instr_F20F5D, safe_read64s, read_xmm64s)
void instr_F30F5D(float_t source, int32_t r) {
// minss xmm, xmm/mem32
float_t destination = read_xmm_f32(r);
float result = sse_min(destination, source);
write_xmm_f32(r, result);
}
DEFINE_SSE_SPLIT(instr_F30F5D, fpu_load_m32, read_xmm_f32)
void instr_0F5E(union reg128 source, int32_t r) {
// divps xmm, xmm/mem128
union reg128 destination = read_xmm128s(r);
union reg128 result = {
.f32 = {
destination.f32[0] / source.f32[0],
destination.f32[1] / source.f32[1],
destination.f32[2] / source.f32[2],
destination.f32[3] / source.f32[3],
}
};
write_xmm_reg128(r, result);
}
DEFINE_SSE_SPLIT(instr_0F5E, safe_read128s, read_xmm128s)
void instr_660F5E(union reg128 source, int32_t r) {
// divpd xmm, xmm/mem128
union reg128 destination = read_xmm128s(r);
union reg128 result = {
.f64 = {
destination.f64[0] / source.f64[0],
destination.f64[1] / source.f64[1],
}
};
write_xmm_reg128(r, result);
}
DEFINE_SSE_SPLIT(instr_660F5E, safe_read128s, read_xmm128s)
void instr_F20F5E(union reg64 source, int32_t r) {
// divsd xmm, xmm/mem64
union reg64 destination = read_xmm64s(r);
union reg64 result = {
.f64 = { destination.f64[0] / source.f64[0], }
};
write_xmm64(r, result);
}
DEFINE_SSE_SPLIT(instr_F20F5E, safe_read64s, read_xmm64s)
void instr_F30F5E(float_t source, int32_t r) {
// divss xmm, xmm/mem32
float_t destination = read_xmm_f32(r);
float result = destination / source;
write_xmm_f32(r, result);
}
DEFINE_SSE_SPLIT(instr_F30F5E, fpu_load_m32, read_xmm_f32)
void instr_0F5F(union reg128 source, int32_t r) {
// maxps xmm, xmm/mem128
union reg128 destination = read_xmm128s(r);
union reg128 result = {
.f32 = {
sse_max(destination.f32[0], source.f32[0]),
sse_max(destination.f32[1], source.f32[1]),
sse_max(destination.f32[2], source.f32[2]),
sse_max(destination.f32[3], source.f32[3]),
}
};
write_xmm_reg128(r, result);
}
DEFINE_SSE_SPLIT(instr_0F5F, safe_read128s, read_xmm128s)
void instr_660F5F(union reg128 source, int32_t r) {
// maxpd xmm, xmm/mem128
union reg128 destination = read_xmm128s(r);
union reg128 result = {
.f64 = {
sse_max(destination.f64[0], source.f64[0]),
sse_max(destination.f64[1], source.f64[1]),
}
};
write_xmm_reg128(r, result);
}
DEFINE_SSE_SPLIT(instr_660F5F, safe_read128s, read_xmm128s)
void instr_F20F5F(union reg64 source, int32_t r) {
// maxsd xmm, xmm/mem64
union reg64 destination = read_xmm64s(r);
union reg64 result = {
.f64 = { sse_max(destination.f64[0], source.f64[0]), }
};
write_xmm64(r, result);
}
DEFINE_SSE_SPLIT(instr_F20F5F, safe_read64s, read_xmm64s)
void instr_F30F5F(float_t source, int32_t r) {
// maxss xmm, xmm/mem32
float_t destination = read_xmm_f32(r);
float result = sse_max(destination, source);
write_xmm_f32(r, result);
}
DEFINE_SSE_SPLIT(instr_F30F5F, fpu_load_m32, read_xmm_f32)
void instr_0F60(int32_t source, int32_t r) {

View file

@ -417,3 +417,15 @@ bool sse_comparison(int32_t op, double_t x, double_t y)
assert(false);
}
double_t sse_min(double_t x, double_t y)
{
// if both x and y are 0 or x is nan, y is returned
return x < y ? x : y;
}
double_t sse_max(double_t x, double_t y)
{
// if both x and y are 0 or x is nan, y is returned
return x > y ? x : y;
}

View file

@ -33,3 +33,5 @@ void psrlq_r128(int32_t r, uint32_t shift);
void psllq_r128(int32_t r, uint32_t shift);
bool sse_comparison(int32_t op, double_t x, double_t y);
double_t sse_min(double_t x, double_t y);
double_t sse_max(double_t x, double_t y);

View file

@ -2821,10 +2821,10 @@ void test_sse(void)
SSE_OPS(add);
SSE_OPS(mul);
SSE_OPS(sub);
//SSE_OPS(min);
//SSE_OPS(div);
//SSE_OPS(max);
//SSE_OPS(sqrt);
SSE_OPS(min);
SSE_OPS(div);
SSE_OPS(max);
SSE_OPS(sqrt);
SSE_OPS(cmpeq);
SSE_OPS(cmplt);
SSE_OPS(cmple);
@ -2846,10 +2846,10 @@ void test_sse(void)
SSE_OPD(add);
SSE_OPD(mul);
SSE_OPD(sub);
//SSE_OPD(min);
//SSE_OPD(div);
//SSE_OPD(max);
//SSE_OPD(sqrt);
SSE_OPD(min);
SSE_OPD(div);
SSE_OPD(max);
SSE_OPD(sqrt);
SSE_OPD(cmpeq);
SSE_OPD(cmplt);
SSE_OPD(cmple);