Much improved performance for String functions; prevent possible infinite loop in malicious guest

This commit is contained in:
copy 2013-12-06 01:40:25 +01:00
commit 7165e4080e

View file

@ -4,119 +4,222 @@
* string operations
*
* cmp si di
* movs 0 1 1 A4
* cmps 1 1 1 A6
* stos 0 0 1 AA
* lods 0 1 0 AC
* scas 1 0 1 AE
* ins 0 0 1
* movs 0 1 1/w A4
* cmps 1 1 1/r A6
* stos 0 0 1/w AA
* lods 0 1 0 AC
* scas 1 0 1/r AE
* ins 0 0 1/w
* outs 0 1 0
*/
#define string_instruction(s, use_cmp, use_di, use_si, fn, aligned_fn)\
var src, dest, data_src, data_dest;\
var size = flags & flag_direction ? -(s >> 3) : s >> 3;\
var ds, es;\
if(use_cmp && !use_si) data_src = reg ## s[reg_eax];\
if(use_di) es = get_seg(reg_es), dest = es + regv[reg_vdi];\
if(use_si) ds = get_seg_prefix(reg_ds), src = ds + regv[reg_vsi];\
if(repeat_string_prefix) {\
if(regv[reg_vcx] === 0) return;\
var aligned = s > 8 && (!use_di || (dest & (s >> 3) - 1) === 0) && (!use_si || (src & (s >> 3) - 1) === 0);\
do {\
if(aligned) {\
aligned_fn;\
} else {\
fn;\
}\
if(use_di) dest += size, regv[reg_vdi] += size;\
if(use_si) src += size, regv[reg_vsi] += size;\
} while(--regv[reg_vcx] && (!use_cmp || (data_src === data_dest) === repeat_string_type));\
} else {\
#define loop(s, fn)\
do {\
fn;\
if(use_di) dest += size;\
if(use_si) src += size;\
cont = --count && (!use_cmp || (data_src === data_dest) === repeat_string_type);\
} while(cont && next_cycle--)
#define aligned_loop(s, fn)\
var single_size = size >> 31 | 1;\
if(s === 32) { if(use_di) phys_dest >>>= 2; if(use_si) phys_src >>>= 2; }\
else if(s === 16) { if(use_di) phys_dest >>>= 1; if(use_si) phys_src >>>= 1; }\
do {\
fn;\
if(use_di) phys_dest += single_size;\
if(use_si) phys_src += single_size;\
cont = --count && (!use_cmp || (data_src === data_dest) === repeat_string_type);\
} while(cont && next_cycle--)
#define string_instruction(s, fn, aligned_fn)\
var src, dest, data_src, data_dest, phys_dest, phys_src;\
var size = flags & flag_direction ? -(s >> 3) : s >> 3;\
var cont = false;\
if(use_cmp && !use_si) data_src = s === 32 ? reg32s[reg_eax] : reg ## s[reg_al];\
if(use_di) dest = get_seg(reg_es) + regv[reg_vdi] | 0;\
if(use_si) src = get_seg_prefix(reg_ds) + regv[reg_vsi] | 0;\
if(repeat_string_prefix) {\
var count = regv[reg_vcx],\
start_count = count;\
if(count === 0) return;\
var next_cycle = 0x4000;\
var aligned = s === 8 ||\
((!use_di || !(dest & (s >> 3) - 1)) && (!use_si || !(src & (s >> 3) - 1)));\
if(aligned) {\
if(paging) {\
if(use_di) {\
next_cycle = ~dest & 0xFFF;\
phys_dest = use_cmp ? translate_address_read(dest) : translate_address_write(dest);\
}\
if(use_si) {\
next_cycle = Math.min(next_cycle, ~src & 0xFFF);\
phys_src = translate_address_read(src);\
}\
if(s === 32) next_cycle >>= 2;\
else if(s === 16) next_cycle >>= 1;\
} else { \
if(use_di) phys_dest = dest;\
if(use_si) phys_src = src;\
}\
aligned_loop(s, aligned_fn);\
} else { \
loop(s, fn);\
}\
var diff = size * (start_count - count) | 0;\
if(use_di) regv[reg_vdi] += diff;\
if(use_si) regv[reg_vsi] += diff;\
regv[reg_vcx] = count;\
} else {\
if(s === 8) { \
if(use_si) phys_src = translate_address_read(src);\
if(use_di) phys_dest = use_cmp ? translate_address_read(dest) : translate_address_write(dest);\
aligned_fn; \
} else { fn; }\
if(use_di) regv[reg_vdi] += size;\
if(use_si) regv[reg_vsi] += size;\
}\
if(use_cmp) cmp ## s(data_src, data_dest);\
if(use_cmp) {\
if(s === 32) cmp32(data_src >>> 0, data_dest >>> 0);\
else cmp ## s(data_src, data_dest);\
}\
if(cont) {\
instruction_pointer = previous_ip;\
}
#define use_cmp false
#define use_si true
#define use_di true
function movsb()
{
string_instruction(8, false, true, true,
string_instruction(8,
{
safe_write8(dest, safe_read8(src));
}, {});
// no unaligned fn, bytewise is always aligned
}, {
memory.write8(phys_dest, memory.read8(phys_src));
});
}
function movsw()
{
string_instruction(16, false, true, true,
string_instruction(16,
{
safe_write16(dest, safe_read16(src));
}, {
var phys_src = translate_address_read(src);
var phys_dest = translate_address_write(dest);
memory.write_aligned16(phys_dest, memory.read_aligned16(phys_src));
});
}
function movsd()
{
string_instruction(32, false, true, true,
// TODO: paging
// For now use standard method
if(repeat_string_prefix && !paging)
{
// often used by memcpy, well worth optimizing
// using memory.mem32s.set
var ds = get_seg_prefix(reg_ds),
src = ds + regv[reg_vsi],
es = get_seg(reg_es),
dest = es + regv[reg_vdi],
count = regv[reg_vcx];
if(!(dest & 3) && !(src & 3) && dest + count < memory_size)
{
dest >>= 2;
src >>= 2;
if(flags & flag_direction)
{
dest -= count - 1;
src -= count - 1;
}
if(paging)
{
// TODO
}
else
{
var diff = flags & flag_direction ? -count << 2 : count << 2;
regv[reg_vcx] = 0;
regv[reg_vdi] += diff;
regv[reg_vsi] += diff;
memory.mem32s.set(memory.mem32s.subarray(src, src + count), dest);
return;
}
}
}
string_instruction(32,
{
safe_write32(dest, safe_read32s(src));
}, {
var phys_src = translate_address_read(src);
var phys_dest = translate_address_write(dest);
memory.write_aligned32(phys_dest, memory.read_aligned32(phys_src));
});
}
#undef use_cmp
#undef use_si
#undef use_di
#define use_cmp true
#define use_si true
#define use_di true
function cmpsb()
{
string_instruction(8, true, true, true,
string_instruction(8,
{
data_dest = safe_read8(dest);
data_src = safe_read8(src);
}, {});
}, {
data_dest = memory.read8(phys_dest);
data_src = memory.read8(phys_src);
});
}
function cmpsw()
{
string_instruction(16, true, true, true,
string_instruction(16,
{
data_dest = safe_read16(dest);
data_src = safe_read16(src);
}, {
data_dest = memory.read_aligned16(translate_address_read(dest));
data_src = memory.read_aligned16(translate_address_read(src));
data_dest = memory.read_aligned16(phys_dest);
data_src = memory.read_aligned16(phys_src);
});
}
function cmpsd()
{
string_instruction(32, true, true, true,
string_instruction(32,
{
data_dest = safe_read32(dest);
data_src = safe_read32(src);
data_dest = safe_read32s(dest);
data_src = safe_read32s(src);
}, {
data_dest = memory.read_aligned32(translate_address_read(dest)) >>> 0;
data_src = memory.read_aligned32(translate_address_read(src)) >>> 0;
data_dest = memory.read_aligned32(phys_dest);
data_src = memory.read_aligned32(phys_src);
});
}
#undef use_cmp
#undef use_si
#undef use_di
#define use_cmp false
#define use_si false
#define use_di true
function stosb()
{
var data = reg8[reg_al];
string_instruction(8, false, true, false,
string_instruction(8,
{
safe_write8(dest, data);
}, {});
}, {
memory.write8(phys_dest, data);
});
}
@ -124,11 +227,11 @@ function stosw()
{
var data = reg16[reg_ax];
string_instruction(16, false, true, false,
string_instruction(16,
{
safe_write16(dest, data);
}, {
memory.write_aligned16(translate_address_write(dest), data);
memory.write_aligned16(phys_dest, data);
});
}
@ -136,87 +239,107 @@ function stosw()
function stosd()
{
//dbg_log("stosd " + ((reg32[reg_edi] & 3) ? "mis" : "") + "aligned", LOG_CPU);
var data = reg32[reg_eax];
var data = reg32s[reg_eax];
string_instruction(32, false, true, false,
string_instruction(32,
{
safe_write32(dest, data);
}, {
memory.write_aligned32(translate_address_write(dest), data);
memory.write_aligned32(phys_dest, data);
});
}
#undef use_cmp
#undef use_si
#undef use_di
#define use_cmp false
#define use_si true
#define use_di false
function lodsb()
{
string_instruction(8, false, false, true,
string_instruction(8,
{
reg8[reg_al] = safe_read8(src);
}, {});
}, {
reg8[reg_al] = memory.read8(phys_src);
});
}
function lodsw()
{
string_instruction(16, false, false, true,
string_instruction(16,
{
reg16[reg_ax] = safe_read16(src);
}, {
reg16[reg_ax] = safe_read16(src);
reg16[reg_ax] = memory.read_aligned16(phys_src);
});
}
function lodsd()
{
string_instruction(32, false, false, true,
string_instruction(32,
{
reg32[reg_eax] = safe_read32s(src);
}, {
reg32[reg_eax] = safe_read32s(src);
reg32[reg_eax] = memory.read_aligned32(phys_src);
});
}
#undef use_cmp
#undef use_si
#undef use_di
#define use_cmp true
#define use_si false
#define use_di true
function scasb()
{
string_instruction(8, true, true, false,
string_instruction(8,
{
data_dest = safe_read8(dest);
}, {});
}, {
data_dest = memory.read8(phys_dest);
});
}
function scasw()
{
string_instruction(16, true, true, false,
string_instruction(16,
{
data_dest = safe_read16(dest);
}, {
data_dest = memory.read_aligned16(translate_address_read(dest));
data_dest = memory.read_aligned16(phys_dest);
});
}
function scasd()
{
string_instruction(32, true, true, false,
string_instruction(32,
{
data_dest = safe_read32(dest);
data_dest = safe_read32s(dest);
}, {
data_dest = memory.read_aligned32(translate_address_read(dest)) >>> 0;
data_dest = memory.read_aligned32(phys_dest);
});
}
#undef use_cmp
#undef use_si
#undef use_di
#define use_cmp false
#define use_si false
#define use_di true
function insb()
{
test_privileges_for_io();
var port = reg16[reg_dx];
string_instruction(8, false, true, false,
string_instruction(8,
{
safe_write8(dest, io.port_read8(port));
}, {
memory.write8(phys_dest, io.port_read8(port));
});
}
@ -226,11 +349,10 @@ function insw()
var port = reg16[reg_dx];
string_instruction(16, false, true, false,
string_instruction(16,
{
safe_write16(dest, io.port_read16(port));
}, {
var phys_dest = translate_address_write(dest);
memory.write_aligned16(phys_dest, io.port_read16(port));
});
}
@ -241,25 +363,31 @@ function insd()
var port = reg16[reg_dx];
string_instruction(32, false, true, false,
string_instruction(32,
{
safe_write32(dest, io.port_read32(port));
}, {
var phys_dest = translate_address_write(dest);
memory.write_aligned32(phys_dest, io.port_read32(port));
});
}
#undef use_cmp
#undef use_si
#undef use_di
#define use_cmp false
#define use_si true
#define use_di false
function outsb()
{
test_privileges_for_io();
var port = reg16[reg_dx];
string_instruction(8, false, false, true,
string_instruction(8,
{
io.port_write8(port, safe_read8(src));
}, {
io.port_write8(port, memory.read8(phys_src));
});
}
@ -269,11 +397,11 @@ function outsw()
var port = reg16[reg_dx];
string_instruction(16, false, false, true,
string_instruction(16,
{
io.port_write16(port, safe_read16(src));
}, {
io.port_write16(port, safe_read16(src));
io.port_write16(port, memory.read_aligned16(phys_src));
});
}
@ -283,10 +411,17 @@ function outsd()
var port = reg16[reg_dx];
string_instruction(32, false, false, true,
string_instruction(32,
{
io.port_write32(port, safe_read32s(src));
}, {
io.port_write32(port, safe_read32s(src));
io.port_write32(port, memory.read_aligned32(phys_src));
});
}
#undef use_cmp
#undef use_si
#undef use_di
#undef loop
#undef string_instruction