Add support for PAE (#599)

Physical memory is still limited to 32-bit addresses, but systems that enable PAE should work now.
This commit is contained in:
Paweł Marczewski 2022-01-14 21:19:54 +01:00 committed by GitHub
parent 6b9d1f74d5
commit f72d0a9ca0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 271 additions and 37 deletions

View file

@ -1762,7 +1762,7 @@
$("dump_gdt").onclick = debug.dump_gdt_ldt.bind(debug);
$("dump_idt").onclick = debug.dump_idt.bind(debug);
$("dump_regs").onclick = debug.dump_regs.bind(debug);
$("dump_pt").onclick = debug.dump_page_directory.bind(debug);
$("dump_pt").onclick = debug.dump_page_structures.bind(debug);
$("dump_log").onclick = function()
{

View file

@ -120,6 +120,7 @@ var
/** @const */
var CR0_PG = 1 << 31;
var CR4_PAE = 1 << 5;
// https://github.com/qemu/seabios/blob/14221cd86eadba82255fdc55ed174d401c7a0a04/src/fw/paravirt.c#L205-L219

View file

@ -39,7 +39,7 @@ CPU.prototype.debug_init = function()
debug.dump_state = dump_state;
debug.dump_stack = dump_stack;
debug.dump_page_directory = dump_page_directory;
debug.dump_page_structures = dump_page_structures;
debug.dump_gdt_ldt = dump_gdt_ldt;
debug.dump_idt = dump_idt;
@ -115,6 +115,7 @@ CPU.prototype.debug_init = function()
}
return ("mode=" + mode + "/" + op_size + " paging=" + (+((cpu.cr[0] & CR0_PG) !== 0)) +
" pae=" + (+((cpu.cr[4] & CR4_PAE) !== 0)) +
" iopl=" + iopl + " cpl=" + cpl + " if=" + if_ + " cs:eip=" + cs_eip +
" cs_off=" + h(cpu.get_seg_cs() >>> 0, 8) +
" flgs=" + h(cpu.get_eflags() >>> 0, 6) + " (" + flag_string + ")" +
@ -297,7 +298,7 @@ CPU.prototype.debug_init = function()
}
}
function load_page_entry(dword_entry, is_directory)
function load_page_entry(dword_entry, pae, is_directory)
{
if(!DEBUG) return;
@ -312,7 +313,7 @@ CPU.prototype.debug_init = function()
if(size && !is_directory)
{
address = dword_entry & 0xFFC00000;
address = dword_entry & (pae ? 0xFFE00000 : 0xFFC00000);
}
else
{
@ -331,19 +332,47 @@ CPU.prototype.debug_init = function()
};
}
function dump_page_directory()
var dbg_log = console.log.bind(console);
function dump_page_structures() {
var pae = !!(cpu.cr[4] & CR4_PAE);
if (pae)
{
dbg_log("PAE enabled");
for (var i = 0; i < 4; i++) {
var addr = cpu.cr[3] + 8 * i;
var dword = cpu.read32s(addr);
if (dword & 1)
{
dump_page_directory(dword & 0xFFFFF000, true, i << 30);
}
}
}
else
{
dbg_log("PAE disabled");
dump_page_directory(cpu.cr[3], false, 0);
}
}
/* NOTE: PAE entries are 64-bits, we ignore the high half here. */
function dump_page_directory(pd_addr, pae, start)
{
if(!DEBUG) return;
for(var i = 0; i < 1024; i++)
var n = pae ? 512 : 1024;
var entry_size = pae ? 8 : 4;
var pd_shift = pae ? 21 : 22;
for(var i = 0; i < n; i++)
{
var addr = cpu.cr[3] + 4 * i;
var dword = cpu.read32s(addr),
entry = load_page_entry(dword, true);
var addr = pd_addr + i * entry_size,
dword = cpu.read32s(addr),
entry = load_page_entry(dword, pae, true);
if(!entry)
{
dbg_log("Not present: " + h((i << 22) >>> 0, 8));
continue;
}
@ -357,20 +386,21 @@ CPU.prototype.debug_init = function()
if(entry.size)
{
dbg_log("=== " + h((i << 22) >>> 0, 8) + " -> " + h(entry.address >>> 0, 8) + " | " + flags);
dbg_log("=== " + h(start + (i << pd_shift) >>> 0, 8) + " -> " +
h(entry.address >>> 0, 8) + " | " + flags);
continue;
}
else
{
dbg_log("=== " + h((i << 22) >>> 0, 8) + " | " + flags);
dbg_log("=== " + h(start + (i << pd_shift) >>> 0, 8) + " | " + flags);
}
for(var j = 0; j < 1024; j++)
for(var j = 0; j < n; j++)
{
var sub_addr = entry.address + 4 * j;
var sub_addr = entry.address + j * entry_size;
dword = cpu.read32s(sub_addr);
var subentry = load_page_entry(dword, false);
var subentry = load_page_entry(dword, pae, false);
if(subentry)
{
@ -383,7 +413,7 @@ CPU.prototype.debug_init = function()
flags += subentry.accessed ? "A " : " ";
flags += subentry.dirty ? "Di " : " ";
dbg_log("# " + h((i << 22 | j << 12) >>> 0, 8) + " -> " +
dbg_log("# " + h(start + (i << pd_shift | j << 12) >>> 0, 8) + " -> " +
h(subentry.address, 8) + " | " + flags + " (at " + h(sub_addr, 8) + ")");
}
}

View file

@ -20,8 +20,8 @@ use cpu::global_pointers::*;
use cpu::memory;
use cpu::memory::mem8;
use cpu::memory::{
in_mapped_range, read8, read16, read32s, read64s, read128, read_aligned32, write8,
write_aligned32,
in_mapped_range, read8, read16, read32s, read64s, read128, read_aligned32,
read_aligned64, write8, write_aligned32,
};
use cpu::misc_instr::{
adjust_stack_reg, get_stack_pointer, getaf, getcf, getof, getpf, getsf, getzf, pop16, pop32s,
@ -1797,6 +1797,20 @@ pub unsafe fn do_page_translation(addr: i32, for_writing: bool, user: bool) -> O
}
}
/*
* 32-bit paging:
* - 10 bits PD | 10 bits PT | 12 bits offset
* - 10 bits PD | 22 bits offset (4MB huge page)
*
* PAE paging:
* - 2 bits PDPT | 9 bits PD | 9 bits PT | 12 bits offset
* - 2 bits PDPT | 9 bits PD | 21 bits offset (2MB huge page)
*
* Note that PAE entries are 64-bit, and can describe physical addresses over 32
* bits. However, since we support only 32-bit physical addresses, we require
* the high half of the entry to be 0 (except for the execute-disable bit in
* PDE and PTE).
*/
pub unsafe fn do_page_walk(
addr: i32,
for_writing: bool,
@ -1816,16 +1830,25 @@ pub unsafe fn do_page_walk(
else {
profiler::stat_increment(TLB_MISS);
let page_dir_addr = (*cr.offset(3) as u32 >> 2).wrapping_add((page >> 10) as u32) as i32;
let page_dir_entry = read_aligned32(page_dir_addr as u32);
// XXX
let kernel_write_override = !user && 0 == *cr & CR0_WP;
if 0 == page_dir_entry & PAGE_TABLE_PRESENT_MASK {
// to do at this place:
//
// - set cr2 = addr (which caused the page fault)
// - call_interrupt_vector with id 14, error code 0-7 (requires information if read or write)
// - prevent execution of the function that triggered this call
let pae = *cr.offset(4) & CR4_PAE != 0;
let (page_dir_addr, page_dir_entry) =
match walk_page_directory(pae, addr) {
Some((a, e)) => (a, e),
// to do at this place:
//
// - set cr2 = addr (which caused the page fault)
// - call_interrupt_vector with id 14, error code 0-7 (requires information if read or write)
// - prevent execution of the function that triggered this call
None => return Err(PageFault {
addr,
for_writing,
user,
present: false,
}),
};
if page_dir_entry & PAGE_TABLE_PRESENT_MASK == 0 {
return Err(PageFault {
addr,
for_writing,
@ -1833,6 +1856,9 @@ pub unsafe fn do_page_walk(
present: false,
});
}
// XXX
let kernel_write_override = !user && 0 == *cr & CR0_WP;
if page_dir_entry & PAGE_TABLE_RW_MASK == 0 && !kernel_write_override {
can_write = false;
if for_writing {
@ -1868,13 +1894,17 @@ pub unsafe fn do_page_walk(
write_aligned32(page_dir_addr as u32, new_page_dir_entry);
}
high = (page_dir_entry as u32 & 0xFFC00000 | (addr & 0x3FF000) as u32) as i32;
high = if pae {
(page_dir_entry as u32 & 0xFFE00000 | (addr & 0x1FF000) as u32) as i32
} else {
(page_dir_entry as u32 & 0xFFC00000 | (addr & 0x3FF000) as u32) as i32
};
global = page_dir_entry & PAGE_TABLE_GLOBAL_MASK == PAGE_TABLE_GLOBAL_MASK
}
else {
let page_table_addr = ((page_dir_entry as u32 & 0xFFFFF000) >> 2)
.wrapping_add((page & 1023) as u32) as i32;
let page_table_entry = read_aligned32(page_table_addr as u32);
let (page_table_addr, page_table_entry) =
walk_page_table(pae, addr, page_dir_entry);
if page_table_entry & PAGE_TABLE_PRESENT_MASK == 0 {
return Err(PageFault {
addr,
@ -1883,6 +1913,7 @@ pub unsafe fn do_page_walk(
present: false,
});
}
if page_table_entry & PAGE_TABLE_RW_MASK == 0 && !kernel_write_override {
can_write = false;
if for_writing {
@ -1967,6 +1998,65 @@ pub unsafe fn do_page_walk(
return Ok(high);
}
unsafe fn walk_page_directory(pae: bool, addr: i32) -> Option<(i32, i32)> {
if pae {
let pdpt_idx = (addr as u32) >> 30;
let page_dir_idx = ((addr as u32) >> 21) & 0x1FF;
let pdpt_addr = (*cr.offset(3) as u32 >> 2).wrapping_add(pdpt_idx << 1);
let pdpt_entry = read_aligned64(pdpt_addr);
if pdpt_entry as i32 & PAGE_TABLE_PRESENT_MASK == 0 {
return None;
}
dbg_assert!(
pdpt_entry as u64 & 0xFFFF_FFFF_0000_0000 == 0,
"Unsupported: PDPT entry larger than 32 bits"
);
let page_dir_addr = ((pdpt_entry as u32 & 0xFFFFF000)>> 2).wrapping_add(page_dir_idx << 1);
let page_dir_entry = read_aligned64(page_dir_addr);
// Note that the highest bit of PDE specifies execute-disable, and can
// be set (we'll ignore it anyway).
dbg_assert!(
page_dir_entry as u64 & 0x7FFF_FFFF_0000_0000 == 0,
"Unsupported: Page directory entry larger than 32 bits"
);
return Some((page_dir_addr as i32, page_dir_entry as i32));
}
let page_dir_idx = (addr as u32) >> 22;
let page_dir_addr = (*cr.offset(3) as u32 >> 2).wrapping_add(page_dir_idx);
let page_dir_entry = read_aligned32(page_dir_addr);
return Some((page_dir_addr as i32, page_dir_entry));
}
unsafe fn walk_page_table(
pae: bool,
addr: i32,
page_dir_entry: i32
) -> (i32, i32) {
let page_table = (page_dir_entry as u32 & 0xFFFFF000) >> 2;
if pae {
let page_table_idx = (addr as u32 >> 12) & 0x1FF;
let page_table_addr = page_table.wrapping_add(page_table_idx << 1);
let page_table_entry = read_aligned64(page_table_addr);
// Note that the highest bit of PTE specifies execute-disable, and can
// be set (we'll ignore it anyway).
dbg_assert!(
page_table_entry as u64 & 0x7FFF_FFFF_0000_0000 == 0,
"Unsupported: Page table entry larger than 32 bits"
);
return (page_table_addr as i32, page_table_entry as i32);
}
let page_table_idx = (addr as u32 >> 12) & 0x3FF;
let page_table_addr = page_table.wrapping_add(page_table_idx);
let page_table_entry = read_aligned32(page_table_addr);
return (page_table_addr as i32, page_table_entry);
}
#[no_mangle]
pub unsafe fn full_clear_tlb() {
profiler::stat_increment(FULL_CLEAR_TLB);

View file

@ -798,13 +798,10 @@ pub unsafe fn instr_0F22(r: i32, creg: i32) {
return;
}
else {
if 0 != (*cr.offset(4) ^ data) & (CR4_PGE | CR4_PSE) {
if 0 != (*cr.offset(4) ^ data) & (CR4_PGE | CR4_PSE | CR4_PAE) {
full_clear_tlb();
}
*cr.offset(4) = data;
if 0 != *cr.offset(4) & CR4_PAE {
dbg_assert!(false, "PAE is not supported");
}
}
},
_ => {
@ -3177,7 +3174,7 @@ pub unsafe fn instr_0FA2() {
ecx |= 1 << 31
}; // hypervisor
edx = (if true /* have fpu */ { 1 } else { 0 }) | // fpu
vme | 1 << 3 | 1 << 4 | 1 << 5 | // vme, pse, tsc, msr
vme | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | // vme, pse, tsc, msr, pae
1 << 8 | 1 << 11 | 1 << 13 | 1 << 15 | // cx8, sep, pge, cmov
1 << 23 | 1 << 24 | 1 << 25 | 1 << 26; // mmx, fxsr, sse1, sse2

View file

@ -97,6 +97,19 @@ pub unsafe fn read_aligned32(addr: u32) -> i32 {
};
}
pub unsafe fn read_aligned64(addr: u32) -> i64 {
dbg_assert!(addr < 0x40000000 as u32);
dbg_assert!(addr & 1 == 0);
if in_mapped_range(addr << 2) {
let lo = mmap_read32(addr << 2);
let hi = mmap_read32(addr + 1 << 2);
return lo as i64 | (hi as i64) << 32;
}
else {
return *(mem8 as *mut i64).offset((addr >> 1) as isize);
}
}
pub unsafe fn read128(addr: u32) -> reg128 {
let mut value: reg128 = reg128 {
i8_0: [0 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],

View file

@ -12,6 +12,7 @@ make -C ../../build/libv86.js
./run.js x86/sieve.flat
./run.js x86/ioapic.flat
./run.js x86/apic.flat
./run.js x86/pae.flat
```
Tests can also be run in browser by going to `?profile=test-$name` (for

View file

@ -51,6 +51,7 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
$(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
$(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \
$(TEST_DIR)/hyperv_connections.flat \
$(TEST_DIR)/pae.flat \
ifdef API
tests-api = api/api-sample api/dirty-log api/dirty-log-perf

View file

@ -0,0 +1,101 @@
/* Simple PAE paging test. See lib/x86/vm.c for similar code which sets up
* non-PAE paging. */
#include "fwcfg.h"
#include "asm/page.h"
#include "processor.h"
#ifdef __x86_64__
#error This test is 32-bit only.
#endif
#define HUGE_PAGE_SIZE (1UL << 21)
uint64_t pdpt[4] __attribute__((aligned(0x20)));
uint64_t page_dirs[4 * 512] __attribute__((aligned(0x1000)));
uint64_t page_tables[512 * 512] __attribute__((aligned(0x1000)));
static bool is_pae_supported(void) {
struct cpuid c = cpuid(1);
return c.d & (1 << 6);
}
/* Fill page directory at `pd` with huge page entries. */
static void setup_pd_huge_pages(uint64_t *pd, uint64_t start, uint64_t end) {
uint64_t phys = start;
for (unsigned int i = 0; i < 512; i++) {
*pd++ = phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK |
PT_PAGE_SIZE_MASK;
phys += HUGE_PAGE_SIZE;
if (phys >= end)
return;
}
}
/* Fill page directory at `pd` with page table entries, and use memory at `pt`
* to create page tables. */
static void setup_pd(uint64_t *pd, uint64_t *pt, uint64_t start, uint64_t end) {
uint64_t phys = start;
for (unsigned int i = 0; i < 512; i++) {
*pd++ = (uint32_t)pt | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
for (unsigned int j = 0; j < 512; j++) {
*pt++ = phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
phys += PAGE_SIZE;
if (phys >= end)
return;
}
}
}
static void setup_mmu(void) {
uint64_t mem_size = fwcfg_get_u64(FW_CFG_RAM_SIZE);
if (mem_size > (1ULL << 32))
mem_size = 1ULL << 32;
/* Map physical memory at 0000_0000 using huge pages */
pdpt[0] = (uint32_t)&page_dirs[0 * 512] | PT_PRESENT_MASK;
setup_pd_huge_pages(&page_dirs[0 * 512], 0, mem_size);
/* Map physical memory at 4000_0000 using huge pages */
pdpt[1] = (uint32_t)&page_dirs[1 * 512] | PT_PRESENT_MASK;
setup_pd_huge_pages(&page_dirs[1 * 512], 0, mem_size);
/* Map physical memory at 8000_0000 using huge pages */
pdpt[2] = (uint32_t)&page_dirs[2 * 512] | PT_PRESENT_MASK;
setup_pd_huge_pages(&page_dirs[2 * 512], 0, mem_size);
/* Map physical memory at C000_0000 using normal tables */
pdpt[3] = (uint32_t)&page_dirs[3 * 512] | PT_PRESENT_MASK;
setup_pd(&page_dirs[3 * 512], &page_tables[0], 0, mem_size);
write_cr0(0);
write_cr4(read_cr4() | X86_CR4_PAE);
write_cr3((uint32_t)pdpt);
write_cr0(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP);
printf("paging enabled\n");
}
int main(void)
{
if (!is_pae_supported()) {
printf("PAE not supported\n");
return 1;
}
printf("PAE supported\n");
setup_mmu();
volatile unsigned int test;
for (int i = 1; i < 4; i++) {
volatile unsigned int *ptr = (unsigned int*)((uint32_t)&test + (i << 30));
printf("writing %u to %p, and reading from %p\n", i, ptr, &test);
*ptr = i;
if (test != i) {
printf("error, got %u\n", i);
return 1;
}
}
printf("everything OK\n");
return 0;
}