movs: fast path for unaligned src/dst

This commit is contained in:
Fabian 2022-11-10 16:40:13 -06:00
parent 685c39177f
commit 756ce66888
2 changed files with 50 additions and 24 deletions

View file

@ -228,8 +228,9 @@ pub unsafe fn memset_no_mmap_or_dirty_check(addr: u32, value: u8, count: u32) {
} }
pub unsafe fn memcpy_no_mmap_or_dirty_check(src_addr: u32, dst_addr: u32, count: u32) { pub unsafe fn memcpy_no_mmap_or_dirty_check(src_addr: u32, dst_addr: u32, count: u32) {
dbg_assert!(u32::max(src_addr, dst_addr) - u32::min(src_addr, dst_addr) >= count); dbg_assert!(src_addr < *memory_size);
ptr::copy_nonoverlapping( dbg_assert!(dst_addr < *memory_size);
ptr::copy(
mem8.offset(src_addr as isize), mem8.offset(src_addr as isize),
mem8.offset(dst_addr as isize), mem8.offset(dst_addr as isize),
count as usize, count as usize,

View file

@ -34,7 +34,7 @@ fn count_until_end_of_page(direction: i32, size: i32, addr: u32) -> u32 {
}) as u32 }) as u32
} }
#[derive(Copy, Clone)] #[derive(Copy, Clone, PartialEq)]
enum Instruction { enum Instruction {
Movs, Movs,
Lods, Lods,
@ -135,7 +135,9 @@ unsafe fn string_instruction(
}; };
let is_aligned = (ds + src) & (size_bytes - 1) == 0 && (es + dst) & (size_bytes - 1) == 0; let is_aligned = (ds + src) & (size_bytes - 1) == 0 && (es + dst) & (size_bytes - 1) == 0;
let mut rep_fast = is_aligned
// unaligned movs is properly handled in the fast path
let mut rep_fast = (instruction == Instruction::Movs || is_aligned)
&& is_asize_32 // 16-bit address wraparound && is_asize_32 // 16-bit address wraparound
&& match rep { && match rep {
Rep::NZ | Rep::Z => true, Rep::NZ | Rep::Z => true,
@ -145,7 +147,9 @@ unsafe fn string_instruction(
let mut phys_dst = 0; let mut phys_dst = 0;
let mut phys_src = 0; let mut phys_src = 0;
let mut skip_dirty_page = false; let mut skip_dirty_page = false;
let mut movs_into_svga_lfb = false; let mut movs_into_svga_lfb = false;
let mut movs_reenter_fast_path = false;
if rep_fast { if rep_fast {
match instruction { match instruction {
@ -184,10 +188,27 @@ unsafe fn string_instruction(
match instruction { match instruction {
Instruction::Movs => { Instruction::Movs => {
// note: This check is also valid for both direction == 1 and direction == -1 let c = count * size_bytes as u32;
let overlap = u32::max(phys_src, phys_dst) - u32::min(phys_src, phys_dst)
< count * size_bytes as u32; let overlap_interferes = if phys_src < phys_dst {
rep_fast = rep_fast && !overlap; // backward moves may overlap at the front of the destination string
phys_dst - phys_src < c && direction == 1
}
else if phys_src > phys_dst {
// forward moves may overlap at the front of the source string
phys_src - phys_dst < c && direction == -1
}
else {
false
};
rep_fast = rep_fast && !overlap_interferes;
// In case the following page-boundary check fails, re-enter instruction after
// one iteration of the slow path
movs_reenter_fast_path = rep_fast;
rep_fast = rep_fast
&& (phys_src & 0xFFF <= 0x1000 - size_bytes as u32)
&& (phys_dst & 0xFFF <= 0x1000 - size_bytes as u32);
}, },
_ => {}, _ => {},
} }
@ -419,23 +440,27 @@ unsafe fn string_instruction(
_ => {}, _ => {},
}; };
count -= 1;
let finished = match rep { let finished = match rep {
Rep::Z | Rep::NZ => { Rep::Z | Rep::NZ => match (rep, instruction) {
let rep_cmp = match (rep, instruction) { (Rep::Z, Instruction::Cmps) => src_val != dst_val || count == 0,
(Rep::Z, Instruction::Cmps) => src_val == dst_val, (Rep::Z, Instruction::Scas) => src_val != dst_val || count == 0,
(Rep::Z, Instruction::Scas) => src_val == dst_val, (Rep::NZ, Instruction::Cmps) => src_val == dst_val || count == 0,
(Rep::NZ, Instruction::Cmps) => src_val != dst_val, (Rep::NZ, Instruction::Scas) => src_val == dst_val || count == 0,
(Rep::NZ, Instruction::Scas) => src_val != dst_val, (Rep::NZ | Rep::Z, Instruction::Movs) => {
_ => true, if count == 0 {
}; true
count -= 1; }
if count != 0 && rep_cmp { else if movs_reenter_fast_path {
//*instruction_pointer = *previous_ip *instruction_pointer = *previous_ip;
false true
} }
else { else {
true false
} }
},
_ => count == 0,
}, },
Rep::None => true, Rep::None => true,
}; };