Add support for PAE (#599)
Physical memory is still limited to 32-bit addresses, but systems that enable PAE should work now.
This commit is contained in:
parent
6b9d1f74d5
commit
f72d0a9ca0
|
@ -1762,7 +1762,7 @@
|
|||
$("dump_gdt").onclick = debug.dump_gdt_ldt.bind(debug);
|
||||
$("dump_idt").onclick = debug.dump_idt.bind(debug);
|
||||
$("dump_regs").onclick = debug.dump_regs.bind(debug);
|
||||
$("dump_pt").onclick = debug.dump_page_directory.bind(debug);
|
||||
$("dump_pt").onclick = debug.dump_page_structures.bind(debug);
|
||||
|
||||
$("dump_log").onclick = function()
|
||||
{
|
||||
|
|
|
@ -120,6 +120,7 @@ var
|
|||
|
||||
/** @const */
|
||||
var CR0_PG = 1 << 31;
|
||||
var CR4_PAE = 1 << 5;
|
||||
|
||||
|
||||
// https://github.com/qemu/seabios/blob/14221cd86eadba82255fdc55ed174d401c7a0a04/src/fw/paravirt.c#L205-L219
|
||||
|
|
60
src/debug.js
60
src/debug.js
|
@ -39,7 +39,7 @@ CPU.prototype.debug_init = function()
|
|||
debug.dump_state = dump_state;
|
||||
debug.dump_stack = dump_stack;
|
||||
|
||||
debug.dump_page_directory = dump_page_directory;
|
||||
debug.dump_page_structures = dump_page_structures;
|
||||
debug.dump_gdt_ldt = dump_gdt_ldt;
|
||||
debug.dump_idt = dump_idt;
|
||||
|
||||
|
@ -115,6 +115,7 @@ CPU.prototype.debug_init = function()
|
|||
}
|
||||
|
||||
return ("mode=" + mode + "/" + op_size + " paging=" + (+((cpu.cr[0] & CR0_PG) !== 0)) +
|
||||
" pae=" + (+((cpu.cr[4] & CR4_PAE) !== 0)) +
|
||||
" iopl=" + iopl + " cpl=" + cpl + " if=" + if_ + " cs:eip=" + cs_eip +
|
||||
" cs_off=" + h(cpu.get_seg_cs() >>> 0, 8) +
|
||||
" flgs=" + h(cpu.get_eflags() >>> 0, 6) + " (" + flag_string + ")" +
|
||||
|
@ -297,7 +298,7 @@ CPU.prototype.debug_init = function()
|
|||
}
|
||||
}
|
||||
|
||||
function load_page_entry(dword_entry, is_directory)
|
||||
function load_page_entry(dword_entry, pae, is_directory)
|
||||
{
|
||||
if(!DEBUG) return;
|
||||
|
||||
|
@ -312,7 +313,7 @@ CPU.prototype.debug_init = function()
|
|||
|
||||
if(size && !is_directory)
|
||||
{
|
||||
address = dword_entry & 0xFFC00000;
|
||||
address = dword_entry & (pae ? 0xFFE00000 : 0xFFC00000);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -331,19 +332,47 @@ CPU.prototype.debug_init = function()
|
|||
};
|
||||
}
|
||||
|
||||
function dump_page_directory()
|
||||
var dbg_log = console.log.bind(console);
|
||||
|
||||
function dump_page_structures() {
|
||||
var pae = !!(cpu.cr[4] & CR4_PAE);
|
||||
if (pae)
|
||||
{
|
||||
dbg_log("PAE enabled");
|
||||
|
||||
for (var i = 0; i < 4; i++) {
|
||||
var addr = cpu.cr[3] + 8 * i;
|
||||
var dword = cpu.read32s(addr);
|
||||
if (dword & 1)
|
||||
{
|
||||
dump_page_directory(dword & 0xFFFFF000, true, i << 30);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dbg_log("PAE disabled");
|
||||
dump_page_directory(cpu.cr[3], false, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: PAE entries are 64-bits, we ignore the high half here. */
|
||||
function dump_page_directory(pd_addr, pae, start)
|
||||
{
|
||||
if(!DEBUG) return;
|
||||
|
||||
for(var i = 0; i < 1024; i++)
|
||||
var n = pae ? 512 : 1024;
|
||||
var entry_size = pae ? 8 : 4;
|
||||
var pd_shift = pae ? 21 : 22;
|
||||
|
||||
for(var i = 0; i < n; i++)
|
||||
{
|
||||
var addr = cpu.cr[3] + 4 * i;
|
||||
var dword = cpu.read32s(addr),
|
||||
entry = load_page_entry(dword, true);
|
||||
var addr = pd_addr + i * entry_size,
|
||||
dword = cpu.read32s(addr),
|
||||
entry = load_page_entry(dword, pae, true);
|
||||
|
||||
if(!entry)
|
||||
{
|
||||
dbg_log("Not present: " + h((i << 22) >>> 0, 8));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -357,20 +386,21 @@ CPU.prototype.debug_init = function()
|
|||
|
||||
if(entry.size)
|
||||
{
|
||||
dbg_log("=== " + h((i << 22) >>> 0, 8) + " -> " + h(entry.address >>> 0, 8) + " | " + flags);
|
||||
dbg_log("=== " + h(start + (i << pd_shift) >>> 0, 8) + " -> " +
|
||||
h(entry.address >>> 0, 8) + " | " + flags);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
dbg_log("=== " + h((i << 22) >>> 0, 8) + " | " + flags);
|
||||
dbg_log("=== " + h(start + (i << pd_shift) >>> 0, 8) + " | " + flags);
|
||||
}
|
||||
|
||||
for(var j = 0; j < 1024; j++)
|
||||
for(var j = 0; j < n; j++)
|
||||
{
|
||||
var sub_addr = entry.address + 4 * j;
|
||||
var sub_addr = entry.address + j * entry_size;
|
||||
dword = cpu.read32s(sub_addr);
|
||||
|
||||
var subentry = load_page_entry(dword, false);
|
||||
var subentry = load_page_entry(dword, pae, false);
|
||||
|
||||
if(subentry)
|
||||
{
|
||||
|
@ -383,7 +413,7 @@ CPU.prototype.debug_init = function()
|
|||
flags += subentry.accessed ? "A " : " ";
|
||||
flags += subentry.dirty ? "Di " : " ";
|
||||
|
||||
dbg_log("# " + h((i << 22 | j << 12) >>> 0, 8) + " -> " +
|
||||
dbg_log("# " + h(start + (i << pd_shift | j << 12) >>> 0, 8) + " -> " +
|
||||
h(subentry.address, 8) + " | " + flags + " (at " + h(sub_addr, 8) + ")");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@ use cpu::global_pointers::*;
|
|||
use cpu::memory;
|
||||
use cpu::memory::mem8;
|
||||
use cpu::memory::{
|
||||
in_mapped_range, read8, read16, read32s, read64s, read128, read_aligned32, write8,
|
||||
write_aligned32,
|
||||
in_mapped_range, read8, read16, read32s, read64s, read128, read_aligned32,
|
||||
read_aligned64, write8, write_aligned32,
|
||||
};
|
||||
use cpu::misc_instr::{
|
||||
adjust_stack_reg, get_stack_pointer, getaf, getcf, getof, getpf, getsf, getzf, pop16, pop32s,
|
||||
|
@ -1797,6 +1797,20 @@ pub unsafe fn do_page_translation(addr: i32, for_writing: bool, user: bool) -> O
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* 32-bit paging:
|
||||
* - 10 bits PD | 10 bits PT | 12 bits offset
|
||||
* - 10 bits PD | 22 bits offset (4MB huge page)
|
||||
*
|
||||
* PAE paging:
|
||||
* - 2 bits PDPT | 9 bits PD | 9 bits PT | 12 bits offset
|
||||
* - 2 bits PDPT | 9 bits PD | 21 bits offset (2MB huge page)
|
||||
*
|
||||
* Note that PAE entries are 64-bit, and can describe physical addresses over 32
|
||||
* bits. However, since we support only 32-bit physical addresses, we require
|
||||
* the high half of the entry to be 0 (except for the execute-disable bit in
|
||||
* PDE and PTE).
|
||||
*/
|
||||
pub unsafe fn do_page_walk(
|
||||
addr: i32,
|
||||
for_writing: bool,
|
||||
|
@ -1816,16 +1830,25 @@ pub unsafe fn do_page_walk(
|
|||
else {
|
||||
profiler::stat_increment(TLB_MISS);
|
||||
|
||||
let page_dir_addr = (*cr.offset(3) as u32 >> 2).wrapping_add((page >> 10) as u32) as i32;
|
||||
let page_dir_entry = read_aligned32(page_dir_addr as u32);
|
||||
// XXX
|
||||
let kernel_write_override = !user && 0 == *cr & CR0_WP;
|
||||
if 0 == page_dir_entry & PAGE_TABLE_PRESENT_MASK {
|
||||
// to do at this place:
|
||||
//
|
||||
// - set cr2 = addr (which caused the page fault)
|
||||
// - call_interrupt_vector with id 14, error code 0-7 (requires information if read or write)
|
||||
// - prevent execution of the function that triggered this call
|
||||
let pae = *cr.offset(4) & CR4_PAE != 0;
|
||||
|
||||
let (page_dir_addr, page_dir_entry) =
|
||||
match walk_page_directory(pae, addr) {
|
||||
Some((a, e)) => (a, e),
|
||||
// to do at this place:
|
||||
//
|
||||
// - set cr2 = addr (which caused the page fault)
|
||||
// - call_interrupt_vector with id 14, error code 0-7 (requires information if read or write)
|
||||
// - prevent execution of the function that triggered this call
|
||||
None => return Err(PageFault {
|
||||
addr,
|
||||
for_writing,
|
||||
user,
|
||||
present: false,
|
||||
}),
|
||||
};
|
||||
|
||||
if page_dir_entry & PAGE_TABLE_PRESENT_MASK == 0 {
|
||||
return Err(PageFault {
|
||||
addr,
|
||||
for_writing,
|
||||
|
@ -1833,6 +1856,9 @@ pub unsafe fn do_page_walk(
|
|||
present: false,
|
||||
});
|
||||
}
|
||||
|
||||
// XXX
|
||||
let kernel_write_override = !user && 0 == *cr & CR0_WP;
|
||||
if page_dir_entry & PAGE_TABLE_RW_MASK == 0 && !kernel_write_override {
|
||||
can_write = false;
|
||||
if for_writing {
|
||||
|
@ -1868,13 +1894,17 @@ pub unsafe fn do_page_walk(
|
|||
write_aligned32(page_dir_addr as u32, new_page_dir_entry);
|
||||
}
|
||||
|
||||
high = (page_dir_entry as u32 & 0xFFC00000 | (addr & 0x3FF000) as u32) as i32;
|
||||
high = if pae {
|
||||
(page_dir_entry as u32 & 0xFFE00000 | (addr & 0x1FF000) as u32) as i32
|
||||
} else {
|
||||
(page_dir_entry as u32 & 0xFFC00000 | (addr & 0x3FF000) as u32) as i32
|
||||
};
|
||||
global = page_dir_entry & PAGE_TABLE_GLOBAL_MASK == PAGE_TABLE_GLOBAL_MASK
|
||||
}
|
||||
else {
|
||||
let page_table_addr = ((page_dir_entry as u32 & 0xFFFFF000) >> 2)
|
||||
.wrapping_add((page & 1023) as u32) as i32;
|
||||
let page_table_entry = read_aligned32(page_table_addr as u32);
|
||||
let (page_table_addr, page_table_entry) =
|
||||
walk_page_table(pae, addr, page_dir_entry);
|
||||
|
||||
if page_table_entry & PAGE_TABLE_PRESENT_MASK == 0 {
|
||||
return Err(PageFault {
|
||||
addr,
|
||||
|
@ -1883,6 +1913,7 @@ pub unsafe fn do_page_walk(
|
|||
present: false,
|
||||
});
|
||||
}
|
||||
|
||||
if page_table_entry & PAGE_TABLE_RW_MASK == 0 && !kernel_write_override {
|
||||
can_write = false;
|
||||
if for_writing {
|
||||
|
@ -1967,6 +1998,65 @@ pub unsafe fn do_page_walk(
|
|||
return Ok(high);
|
||||
}
|
||||
|
||||
unsafe fn walk_page_directory(pae: bool, addr: i32) -> Option<(i32, i32)> {
|
||||
if pae {
|
||||
let pdpt_idx = (addr as u32) >> 30;
|
||||
let page_dir_idx = ((addr as u32) >> 21) & 0x1FF;
|
||||
|
||||
let pdpt_addr = (*cr.offset(3) as u32 >> 2).wrapping_add(pdpt_idx << 1);
|
||||
let pdpt_entry = read_aligned64(pdpt_addr);
|
||||
if pdpt_entry as i32 & PAGE_TABLE_PRESENT_MASK == 0 {
|
||||
return None;
|
||||
}
|
||||
dbg_assert!(
|
||||
pdpt_entry as u64 & 0xFFFF_FFFF_0000_0000 == 0,
|
||||
"Unsupported: PDPT entry larger than 32 bits"
|
||||
);
|
||||
|
||||
let page_dir_addr = ((pdpt_entry as u32 & 0xFFFFF000)>> 2).wrapping_add(page_dir_idx << 1);
|
||||
let page_dir_entry = read_aligned64(page_dir_addr);
|
||||
// Note that the highest bit of PDE specifies execute-disable, and can
|
||||
// be set (we'll ignore it anyway).
|
||||
dbg_assert!(
|
||||
page_dir_entry as u64 & 0x7FFF_FFFF_0000_0000 == 0,
|
||||
"Unsupported: Page directory entry larger than 32 bits"
|
||||
);
|
||||
|
||||
return Some((page_dir_addr as i32, page_dir_entry as i32));
|
||||
}
|
||||
|
||||
let page_dir_idx = (addr as u32) >> 22;
|
||||
let page_dir_addr = (*cr.offset(3) as u32 >> 2).wrapping_add(page_dir_idx);
|
||||
let page_dir_entry = read_aligned32(page_dir_addr);
|
||||
return Some((page_dir_addr as i32, page_dir_entry));
|
||||
}
|
||||
|
||||
unsafe fn walk_page_table(
|
||||
pae: bool,
|
||||
addr: i32,
|
||||
page_dir_entry: i32
|
||||
) -> (i32, i32) {
|
||||
let page_table = (page_dir_entry as u32 & 0xFFFFF000) >> 2;
|
||||
if pae {
|
||||
let page_table_idx = (addr as u32 >> 12) & 0x1FF;
|
||||
let page_table_addr = page_table.wrapping_add(page_table_idx << 1);
|
||||
let page_table_entry = read_aligned64(page_table_addr);
|
||||
// Note that the highest bit of PTE specifies execute-disable, and can
|
||||
// be set (we'll ignore it anyway).
|
||||
dbg_assert!(
|
||||
page_table_entry as u64 & 0x7FFF_FFFF_0000_0000 == 0,
|
||||
"Unsupported: Page table entry larger than 32 bits"
|
||||
);
|
||||
|
||||
return (page_table_addr as i32, page_table_entry as i32);
|
||||
}
|
||||
|
||||
let page_table_idx = (addr as u32 >> 12) & 0x3FF;
|
||||
let page_table_addr = page_table.wrapping_add(page_table_idx);
|
||||
let page_table_entry = read_aligned32(page_table_addr);
|
||||
return (page_table_addr as i32, page_table_entry);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe fn full_clear_tlb() {
|
||||
profiler::stat_increment(FULL_CLEAR_TLB);
|
||||
|
|
|
@ -798,13 +798,10 @@ pub unsafe fn instr_0F22(r: i32, creg: i32) {
|
|||
return;
|
||||
}
|
||||
else {
|
||||
if 0 != (*cr.offset(4) ^ data) & (CR4_PGE | CR4_PSE) {
|
||||
if 0 != (*cr.offset(4) ^ data) & (CR4_PGE | CR4_PSE | CR4_PAE) {
|
||||
full_clear_tlb();
|
||||
}
|
||||
*cr.offset(4) = data;
|
||||
if 0 != *cr.offset(4) & CR4_PAE {
|
||||
dbg_assert!(false, "PAE is not supported");
|
||||
}
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
|
@ -3177,7 +3174,7 @@ pub unsafe fn instr_0FA2() {
|
|||
ecx |= 1 << 31
|
||||
}; // hypervisor
|
||||
edx = (if true /* have fpu */ { 1 } else { 0 }) | // fpu
|
||||
vme | 1 << 3 | 1 << 4 | 1 << 5 | // vme, pse, tsc, msr
|
||||
vme | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | // vme, pse, tsc, msr, pae
|
||||
1 << 8 | 1 << 11 | 1 << 13 | 1 << 15 | // cx8, sep, pge, cmov
|
||||
1 << 23 | 1 << 24 | 1 << 25 | 1 << 26; // mmx, fxsr, sse1, sse2
|
||||
|
||||
|
|
|
@ -97,6 +97,19 @@ pub unsafe fn read_aligned32(addr: u32) -> i32 {
|
|||
};
|
||||
}
|
||||
|
||||
pub unsafe fn read_aligned64(addr: u32) -> i64 {
|
||||
dbg_assert!(addr < 0x40000000 as u32);
|
||||
dbg_assert!(addr & 1 == 0);
|
||||
if in_mapped_range(addr << 2) {
|
||||
let lo = mmap_read32(addr << 2);
|
||||
let hi = mmap_read32(addr + 1 << 2);
|
||||
return lo as i64 | (hi as i64) << 32;
|
||||
}
|
||||
else {
|
||||
return *(mem8 as *mut i64).offset((addr >> 1) as isize);
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn read128(addr: u32) -> reg128 {
|
||||
let mut value: reg128 = reg128 {
|
||||
i8_0: [0 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
|
|
|
@ -12,6 +12,7 @@ make -C ../../build/libv86.js
|
|||
./run.js x86/sieve.flat
|
||||
./run.js x86/ioapic.flat
|
||||
./run.js x86/apic.flat
|
||||
./run.js x86/pae.flat
|
||||
```
|
||||
|
||||
Tests can also be run in browser by going to `?profile=test-$name` (for
|
||||
|
|
|
@ -51,6 +51,7 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
|
|||
$(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
|
||||
$(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \
|
||||
$(TEST_DIR)/hyperv_connections.flat \
|
||||
$(TEST_DIR)/pae.flat \
|
||||
|
||||
ifdef API
|
||||
tests-api = api/api-sample api/dirty-log api/dirty-log-perf
|
||||
|
|
101
tests/kvm-unit-tests/x86/pae.c
Normal file
101
tests/kvm-unit-tests/x86/pae.c
Normal file
|
@ -0,0 +1,101 @@
|
|||
/* Simple PAE paging test. See lib/x86/vm.c for similar code which sets up
|
||||
* non-PAE paging. */
|
||||
|
||||
#include "fwcfg.h"
|
||||
#include "asm/page.h"
|
||||
#include "processor.h"
|
||||
|
||||
#ifdef __x86_64__
|
||||
#error This test is 32-bit only.
|
||||
#endif
|
||||
|
||||
#define HUGE_PAGE_SIZE (1UL << 21)
|
||||
|
||||
uint64_t pdpt[4] __attribute__((aligned(0x20)));
|
||||
uint64_t page_dirs[4 * 512] __attribute__((aligned(0x1000)));
|
||||
uint64_t page_tables[512 * 512] __attribute__((aligned(0x1000)));
|
||||
|
||||
static bool is_pae_supported(void) {
|
||||
struct cpuid c = cpuid(1);
|
||||
return c.d & (1 << 6);
|
||||
}
|
||||
|
||||
/* Fill page directory at `pd` with huge page entries. */
|
||||
static void setup_pd_huge_pages(uint64_t *pd, uint64_t start, uint64_t end) {
|
||||
uint64_t phys = start;
|
||||
for (unsigned int i = 0; i < 512; i++) {
|
||||
*pd++ = phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK |
|
||||
PT_PAGE_SIZE_MASK;
|
||||
|
||||
phys += HUGE_PAGE_SIZE;
|
||||
if (phys >= end)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill page directory at `pd` with page table entries, and use memory at `pt`
|
||||
* to create page tables. */
|
||||
static void setup_pd(uint64_t *pd, uint64_t *pt, uint64_t start, uint64_t end) {
|
||||
uint64_t phys = start;
|
||||
for (unsigned int i = 0; i < 512; i++) {
|
||||
*pd++ = (uint32_t)pt | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
|
||||
for (unsigned int j = 0; j < 512; j++) {
|
||||
*pt++ = phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
|
||||
phys += PAGE_SIZE;
|
||||
if (phys >= end)
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_mmu(void) {
|
||||
uint64_t mem_size = fwcfg_get_u64(FW_CFG_RAM_SIZE);
|
||||
if (mem_size > (1ULL << 32))
|
||||
mem_size = 1ULL << 32;
|
||||
|
||||
/* Map physical memory at 0000_0000 using huge pages */
|
||||
pdpt[0] = (uint32_t)&page_dirs[0 * 512] | PT_PRESENT_MASK;
|
||||
setup_pd_huge_pages(&page_dirs[0 * 512], 0, mem_size);
|
||||
|
||||
/* Map physical memory at 4000_0000 using huge pages */
|
||||
pdpt[1] = (uint32_t)&page_dirs[1 * 512] | PT_PRESENT_MASK;
|
||||
setup_pd_huge_pages(&page_dirs[1 * 512], 0, mem_size);
|
||||
|
||||
/* Map physical memory at 8000_0000 using huge pages */
|
||||
pdpt[2] = (uint32_t)&page_dirs[2 * 512] | PT_PRESENT_MASK;
|
||||
setup_pd_huge_pages(&page_dirs[2 * 512], 0, mem_size);
|
||||
|
||||
/* Map physical memory at C000_0000 using normal tables */
|
||||
pdpt[3] = (uint32_t)&page_dirs[3 * 512] | PT_PRESENT_MASK;
|
||||
setup_pd(&page_dirs[3 * 512], &page_tables[0], 0, mem_size);
|
||||
|
||||
write_cr0(0);
|
||||
write_cr4(read_cr4() | X86_CR4_PAE);
|
||||
write_cr3((uint32_t)pdpt);
|
||||
write_cr0(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP);
|
||||
|
||||
printf("paging enabled\n");
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
if (!is_pae_supported()) {
|
||||
printf("PAE not supported\n");
|
||||
return 1;
|
||||
}
|
||||
printf("PAE supported\n");
|
||||
setup_mmu();
|
||||
|
||||
volatile unsigned int test;
|
||||
for (int i = 1; i < 4; i++) {
|
||||
volatile unsigned int *ptr = (unsigned int*)((uint32_t)&test + (i << 30));
|
||||
printf("writing %u to %p, and reading from %p\n", i, ptr, &test);
|
||||
*ptr = i;
|
||||
if (test != i) {
|
||||
printf("error, got %u\n", i);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
printf("everything OK\n");
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue