movs: fast path for unaligned src/dst
This commit is contained in:
parent
685c39177f
commit
756ce66888
|
@ -228,8 +228,9 @@ pub unsafe fn memset_no_mmap_or_dirty_check(addr: u32, value: u8, count: u32) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn memcpy_no_mmap_or_dirty_check(src_addr: u32, dst_addr: u32, count: u32) {
|
pub unsafe fn memcpy_no_mmap_or_dirty_check(src_addr: u32, dst_addr: u32, count: u32) {
|
||||||
dbg_assert!(u32::max(src_addr, dst_addr) - u32::min(src_addr, dst_addr) >= count);
|
dbg_assert!(src_addr < *memory_size);
|
||||||
ptr::copy_nonoverlapping(
|
dbg_assert!(dst_addr < *memory_size);
|
||||||
|
ptr::copy(
|
||||||
mem8.offset(src_addr as isize),
|
mem8.offset(src_addr as isize),
|
||||||
mem8.offset(dst_addr as isize),
|
mem8.offset(dst_addr as isize),
|
||||||
count as usize,
|
count as usize,
|
||||||
|
|
|
@ -34,7 +34,7 @@ fn count_until_end_of_page(direction: i32, size: i32, addr: u32) -> u32 {
|
||||||
}) as u32
|
}) as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone, PartialEq)]
|
||||||
enum Instruction {
|
enum Instruction {
|
||||||
Movs,
|
Movs,
|
||||||
Lods,
|
Lods,
|
||||||
|
@ -135,7 +135,9 @@ unsafe fn string_instruction(
|
||||||
};
|
};
|
||||||
|
|
||||||
let is_aligned = (ds + src) & (size_bytes - 1) == 0 && (es + dst) & (size_bytes - 1) == 0;
|
let is_aligned = (ds + src) & (size_bytes - 1) == 0 && (es + dst) & (size_bytes - 1) == 0;
|
||||||
let mut rep_fast = is_aligned
|
|
||||||
|
// unaligned movs is properly handled in the fast path
|
||||||
|
let mut rep_fast = (instruction == Instruction::Movs || is_aligned)
|
||||||
&& is_asize_32 // 16-bit address wraparound
|
&& is_asize_32 // 16-bit address wraparound
|
||||||
&& match rep {
|
&& match rep {
|
||||||
Rep::NZ | Rep::Z => true,
|
Rep::NZ | Rep::Z => true,
|
||||||
|
@ -145,7 +147,9 @@ unsafe fn string_instruction(
|
||||||
let mut phys_dst = 0;
|
let mut phys_dst = 0;
|
||||||
let mut phys_src = 0;
|
let mut phys_src = 0;
|
||||||
let mut skip_dirty_page = false;
|
let mut skip_dirty_page = false;
|
||||||
|
|
||||||
let mut movs_into_svga_lfb = false;
|
let mut movs_into_svga_lfb = false;
|
||||||
|
let mut movs_reenter_fast_path = false;
|
||||||
|
|
||||||
if rep_fast {
|
if rep_fast {
|
||||||
match instruction {
|
match instruction {
|
||||||
|
@ -184,10 +188,27 @@ unsafe fn string_instruction(
|
||||||
|
|
||||||
match instruction {
|
match instruction {
|
||||||
Instruction::Movs => {
|
Instruction::Movs => {
|
||||||
// note: This check is also valid for both direction == 1 and direction == -1
|
let c = count * size_bytes as u32;
|
||||||
let overlap = u32::max(phys_src, phys_dst) - u32::min(phys_src, phys_dst)
|
|
||||||
< count * size_bytes as u32;
|
let overlap_interferes = if phys_src < phys_dst {
|
||||||
rep_fast = rep_fast && !overlap;
|
// backward moves may overlap at the front of the destination string
|
||||||
|
phys_dst - phys_src < c && direction == 1
|
||||||
|
}
|
||||||
|
else if phys_src > phys_dst {
|
||||||
|
// forward moves may overlap at the front of the source string
|
||||||
|
phys_src - phys_dst < c && direction == -1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
rep_fast = rep_fast && !overlap_interferes;
|
||||||
|
|
||||||
|
// In case the following page-boundary check fails, re-enter instruction after
|
||||||
|
// one iteration of the slow path
|
||||||
|
movs_reenter_fast_path = rep_fast;
|
||||||
|
rep_fast = rep_fast
|
||||||
|
&& (phys_src & 0xFFF <= 0x1000 - size_bytes as u32)
|
||||||
|
&& (phys_dst & 0xFFF <= 0x1000 - size_bytes as u32);
|
||||||
},
|
},
|
||||||
_ => {},
|
_ => {},
|
||||||
}
|
}
|
||||||
|
@ -419,23 +440,27 @@ unsafe fn string_instruction(
|
||||||
_ => {},
|
_ => {},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
count -= 1;
|
||||||
|
|
||||||
let finished = match rep {
|
let finished = match rep {
|
||||||
Rep::Z | Rep::NZ => {
|
Rep::Z | Rep::NZ => match (rep, instruction) {
|
||||||
let rep_cmp = match (rep, instruction) {
|
(Rep::Z, Instruction::Cmps) => src_val != dst_val || count == 0,
|
||||||
(Rep::Z, Instruction::Cmps) => src_val == dst_val,
|
(Rep::Z, Instruction::Scas) => src_val != dst_val || count == 0,
|
||||||
(Rep::Z, Instruction::Scas) => src_val == dst_val,
|
(Rep::NZ, Instruction::Cmps) => src_val == dst_val || count == 0,
|
||||||
(Rep::NZ, Instruction::Cmps) => src_val != dst_val,
|
(Rep::NZ, Instruction::Scas) => src_val == dst_val || count == 0,
|
||||||
(Rep::NZ, Instruction::Scas) => src_val != dst_val,
|
(Rep::NZ | Rep::Z, Instruction::Movs) => {
|
||||||
_ => true,
|
if count == 0 {
|
||||||
};
|
true
|
||||||
count -= 1;
|
}
|
||||||
if count != 0 && rep_cmp {
|
else if movs_reenter_fast_path {
|
||||||
//*instruction_pointer = *previous_ip
|
*instruction_pointer = *previous_ip;
|
||||||
false
|
true
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
true
|
false
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
_ => count == 0,
|
||||||
},
|
},
|
||||||
Rep::None => true,
|
Rep::None => true,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue