From d50101d20c76341527673e69d0ad48c70acf414e Mon Sep 17 00:00:00 2001 From: copy Date: Sun, 2 Apr 2017 15:17:34 -0500 Subject: [PATCH] Import kvm-unit-test into this repo --- tests/kvm-unit-tests/.gitignore | 19 + tests/kvm-unit-tests/COPYRIGHT | 4 + tests/kvm-unit-tests/MAINTAINERS | 84 + tests/kvm-unit-tests/Makefile | 109 + tests/kvm-unit-tests/README.md | 131 ++ tests/kvm-unit-tests/configure | 167 ++ tests/kvm-unit-tests/lib/abort.c | 20 + tests/kvm-unit-tests/lib/alloc.c | 179 ++ tests/kvm-unit-tests/lib/alloc.h | 116 ++ tests/kvm-unit-tests/lib/argv.c | 141 ++ tests/kvm-unit-tests/lib/asm-generic/atomic.h | 21 + .../kvm-unit-tests/lib/asm-generic/barrier.h | 35 + tests/kvm-unit-tests/lib/asm-generic/io.h | 213 ++ tests/kvm-unit-tests/lib/asm-generic/page.h | 29 + .../lib/asm-generic/pci-host-bridge.h | 28 + tests/kvm-unit-tests/lib/asm-generic/pci.h | 4 + .../kvm-unit-tests/lib/asm-generic/spinlock.h | 4 + tests/kvm-unit-tests/lib/auxinfo.c | 2 + tests/kvm-unit-tests/lib/auxinfo.h | 9 + tests/kvm-unit-tests/lib/bitops.h | 36 + tests/kvm-unit-tests/lib/chr-testdev.c | 72 + tests/kvm-unit-tests/lib/chr-testdev.h | 14 + tests/kvm-unit-tests/lib/devicetree.c | 334 +++ tests/kvm-unit-tests/lib/devicetree.h | 251 +++ tests/kvm-unit-tests/lib/errata.h | 24 + tests/kvm-unit-tests/lib/kbuild.h | 8 + tests/kvm-unit-tests/lib/libcflat.h | 132 ++ .../kvm-unit-tests/lib/libfdt/Makefile.libfdt | 10 + tests/kvm-unit-tests/lib/libfdt/README | 4 + tests/kvm-unit-tests/lib/libfdt/fdt.c | 250 +++ tests/kvm-unit-tests/lib/libfdt/fdt.h | 111 + .../lib/libfdt/fdt_empty_tree.c | 84 + tests/kvm-unit-tests/lib/libfdt/fdt_ro.c | 573 +++++ tests/kvm-unit-tests/lib/libfdt/fdt_rw.c | 492 +++++ .../kvm-unit-tests/lib/libfdt/fdt_strerror.c | 96 + tests/kvm-unit-tests/lib/libfdt/fdt_sw.c | 256 +++ tests/kvm-unit-tests/lib/libfdt/fdt_wip.c | 118 ++ tests/kvm-unit-tests/lib/libfdt/libfdt.h | 1514 ++++++++++++++ tests/kvm-unit-tests/lib/libfdt/libfdt_env.h | 111 + .../lib/libfdt/libfdt_internal.h | 95 + tests/kvm-unit-tests/lib/libfdt/version.lds | 60 + tests/kvm-unit-tests/lib/linux/const.h | 27 + tests/kvm-unit-tests/lib/linux/pci_regs.h | 949 +++++++++ tests/kvm-unit-tests/lib/linux/psci.h | 108 + tests/kvm-unit-tests/lib/pci-edu.c | 73 + tests/kvm-unit-tests/lib/pci-edu.h | 86 + tests/kvm-unit-tests/lib/pci-host-generic.c | 320 +++ tests/kvm-unit-tests/lib/pci-host-generic.h | 46 + tests/kvm-unit-tests/lib/pci-testdev.c | 194 ++ tests/kvm-unit-tests/lib/pci.c | 374 ++++ tests/kvm-unit-tests/lib/pci.h | 102 + tests/kvm-unit-tests/lib/printf.c | 261 +++ tests/kvm-unit-tests/lib/report.c | 145 ++ tests/kvm-unit-tests/lib/setjmp.h | 12 + tests/kvm-unit-tests/lib/stack.c | 96 + tests/kvm-unit-tests/lib/stack.h | 21 + tests/kvm-unit-tests/lib/string.c | 175 ++ tests/kvm-unit-tests/lib/string.h | 17 + tests/kvm-unit-tests/lib/util.c | 18 + tests/kvm-unit-tests/lib/util.h | 23 + tests/kvm-unit-tests/lib/virtio-mmio.c | 177 ++ tests/kvm-unit-tests/lib/virtio-mmio.h | 65 + tests/kvm-unit-tests/lib/virtio.c | 130 ++ tests/kvm-unit-tests/lib/virtio.h | 150 ++ tests/kvm-unit-tests/lib/x86/acpi.c | 52 + tests/kvm-unit-tests/lib/x86/acpi.h | 104 + tests/kvm-unit-tests/lib/x86/apic-defs.h | 138 ++ tests/kvm-unit-tests/lib/x86/apic.c | 208 ++ tests/kvm-unit-tests/lib/x86/apic.h | 50 + tests/kvm-unit-tests/lib/x86/asm/barrier.h | 27 + tests/kvm-unit-tests/lib/x86/asm/bitops.h | 14 + tests/kvm-unit-tests/lib/x86/asm/io.h | 65 + tests/kvm-unit-tests/lib/x86/asm/page.h | 48 + tests/kvm-unit-tests/lib/x86/asm/pci.h | 59 + tests/kvm-unit-tests/lib/x86/asm/spinlock.h | 11 + tests/kvm-unit-tests/lib/x86/asm/stack.h | 11 + tests/kvm-unit-tests/lib/x86/atomic.c | 37 + tests/kvm-unit-tests/lib/x86/atomic.h | 166 ++ tests/kvm-unit-tests/lib/x86/desc.c | 407 ++++ tests/kvm-unit-tests/lib/x86/desc.h | 164 ++ tests/kvm-unit-tests/lib/x86/fake-apic.h | 14 + tests/kvm-unit-tests/lib/x86/fwcfg.c | 45 + tests/kvm-unit-tests/lib/x86/fwcfg.h | 44 + tests/kvm-unit-tests/lib/x86/intel-iommu.c | 372 ++++ tests/kvm-unit-tests/lib/x86/intel-iommu.h | 149 ++ tests/kvm-unit-tests/lib/x86/io.c | 99 + tests/kvm-unit-tests/lib/x86/isr.c | 124 ++ tests/kvm-unit-tests/lib/x86/isr.h | 14 + tests/kvm-unit-tests/lib/x86/msr.h | 412 ++++ tests/kvm-unit-tests/lib/x86/processor.h | 433 ++++ tests/kvm-unit-tests/lib/x86/setjmp32.S | 25 + tests/kvm-unit-tests/lib/x86/setjmp64.S | 27 + tests/kvm-unit-tests/lib/x86/setup.c | 47 + tests/kvm-unit-tests/lib/x86/smp.c | 125 ++ tests/kvm-unit-tests/lib/x86/smp.h | 12 + tests/kvm-unit-tests/lib/x86/stack.c | 31 + tests/kvm-unit-tests/lib/x86/vm.c | 224 ++ tests/kvm-unit-tests/lib/x86/vm.h | 31 + tests/kvm-unit-tests/run.js | 38 + tests/kvm-unit-tests/x86/Makefile | 1 + tests/kvm-unit-tests/x86/Makefile.common | 88 + tests/kvm-unit-tests/x86/Makefile.i386 | 33 + tests/kvm-unit-tests/x86/Makefile.x86_64 | 24 + tests/kvm-unit-tests/x86/README | 49 + tests/kvm-unit-tests/x86/access.c | 991 +++++++++ tests/kvm-unit-tests/x86/apic.c | 486 +++++ tests/kvm-unit-tests/x86/asyncpf.c | 109 + tests/kvm-unit-tests/x86/cmpxchg8b.c | 27 + tests/kvm-unit-tests/x86/cstart.S | 212 ++ tests/kvm-unit-tests/x86/cstart64.S | 256 +++ tests/kvm-unit-tests/x86/debug.c | 181 ++ tests/kvm-unit-tests/x86/emulator.c | 1168 +++++++++++ tests/kvm-unit-tests/x86/eventinj.c | 421 ++++ tests/kvm-unit-tests/x86/flat.lds | 21 + tests/kvm-unit-tests/x86/hypercall.c | 80 + tests/kvm-unit-tests/x86/hyperv.c | 25 + tests/kvm-unit-tests/x86/hyperv.h | 191 ++ tests/kvm-unit-tests/x86/hyperv_clock.c | 209 ++ tests/kvm-unit-tests/x86/hyperv_stimer.c | 372 ++++ tests/kvm-unit-tests/x86/hyperv_synic.c | 207 ++ tests/kvm-unit-tests/x86/idt_test.c | 41 + tests/kvm-unit-tests/x86/init.c | 130 ++ tests/kvm-unit-tests/x86/intel-iommu.c | 163 ++ tests/kvm-unit-tests/x86/ioapic.c | 438 ++++ tests/kvm-unit-tests/x86/ioram.h | 7 + tests/kvm-unit-tests/x86/kvmclock.c | 288 +++ tests/kvm-unit-tests/x86/kvmclock.h | 44 + tests/kvm-unit-tests/x86/kvmclock_test.c | 168 ++ tests/kvm-unit-tests/x86/memory.c | 88 + tests/kvm-unit-tests/x86/msr.c | 131 ++ tests/kvm-unit-tests/x86/pcid.c | 167 ++ tests/kvm-unit-tests/x86/pku.c | 138 ++ tests/kvm-unit-tests/x86/pmu.c | 413 ++++ tests/kvm-unit-tests/x86/port80.c | 12 + tests/kvm-unit-tests/x86/realmode.c | 1787 ++++++++++++++++ tests/kvm-unit-tests/x86/realmode.lds | 12 + tests/kvm-unit-tests/x86/rmap_chain.c | 45 + tests/kvm-unit-tests/x86/run | 51 + tests/kvm-unit-tests/x86/s3.c | 89 + tests/kvm-unit-tests/x86/setjmp.c | 19 + tests/kvm-unit-tests/x86/sieve.c | 51 + tests/kvm-unit-tests/x86/smap.c | 187 ++ tests/kvm-unit-tests/x86/smptest.c | 31 + tests/kvm-unit-tests/x86/svm.c | 1081 ++++++++++ tests/kvm-unit-tests/x86/svm.h | 328 +++ tests/kvm-unit-tests/x86/taskswitch.c | 50 + tests/kvm-unit-tests/x86/taskswitch2.c | 294 +++ tests/kvm-unit-tests/x86/tsc.c | 47 + tests/kvm-unit-tests/x86/tsc_adjust.c | 42 + .../kvm-unit-tests/x86/tscdeadline_latency.c | 132 ++ tests/kvm-unit-tests/x86/types.h | 20 + tests/kvm-unit-tests/x86/unittests.cfg | 226 ++ tests/kvm-unit-tests/x86/vmexit.c | 536 +++++ tests/kvm-unit-tests/x86/vmx.c | 1098 ++++++++++ tests/kvm-unit-tests/x86/vmx.h | 622 ++++++ tests/kvm-unit-tests/x86/vmx_tests.c | 1849 +++++++++++++++++ tests/kvm-unit-tests/x86/xsave.c | 178 ++ 157 files changed, 29510 insertions(+) create mode 100644 tests/kvm-unit-tests/.gitignore create mode 100644 tests/kvm-unit-tests/COPYRIGHT create mode 100644 tests/kvm-unit-tests/MAINTAINERS create mode 100644 tests/kvm-unit-tests/Makefile create mode 100644 tests/kvm-unit-tests/README.md create mode 100755 tests/kvm-unit-tests/configure create mode 100644 tests/kvm-unit-tests/lib/abort.c create mode 100644 tests/kvm-unit-tests/lib/alloc.c create mode 100644 tests/kvm-unit-tests/lib/alloc.h create mode 100644 tests/kvm-unit-tests/lib/argv.c create mode 100644 tests/kvm-unit-tests/lib/asm-generic/atomic.h create mode 100644 tests/kvm-unit-tests/lib/asm-generic/barrier.h create mode 100644 tests/kvm-unit-tests/lib/asm-generic/io.h create mode 100644 tests/kvm-unit-tests/lib/asm-generic/page.h create mode 100644 tests/kvm-unit-tests/lib/asm-generic/pci-host-bridge.h create mode 100644 tests/kvm-unit-tests/lib/asm-generic/pci.h create mode 100644 tests/kvm-unit-tests/lib/asm-generic/spinlock.h create mode 100644 tests/kvm-unit-tests/lib/auxinfo.c create mode 100644 tests/kvm-unit-tests/lib/auxinfo.h create mode 100644 tests/kvm-unit-tests/lib/bitops.h create mode 100644 tests/kvm-unit-tests/lib/chr-testdev.c create mode 100644 tests/kvm-unit-tests/lib/chr-testdev.h create mode 100644 tests/kvm-unit-tests/lib/devicetree.c create mode 100644 tests/kvm-unit-tests/lib/devicetree.h create mode 100644 tests/kvm-unit-tests/lib/errata.h create mode 100644 tests/kvm-unit-tests/lib/kbuild.h create mode 100644 tests/kvm-unit-tests/lib/libcflat.h create mode 100644 tests/kvm-unit-tests/lib/libfdt/Makefile.libfdt create mode 100644 tests/kvm-unit-tests/lib/libfdt/README create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt.c create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt.h create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt_empty_tree.c create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt_ro.c create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt_rw.c create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt_strerror.c create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt_sw.c create mode 100644 tests/kvm-unit-tests/lib/libfdt/fdt_wip.c create mode 100644 tests/kvm-unit-tests/lib/libfdt/libfdt.h create mode 100644 tests/kvm-unit-tests/lib/libfdt/libfdt_env.h create mode 100644 tests/kvm-unit-tests/lib/libfdt/libfdt_internal.h create mode 100644 tests/kvm-unit-tests/lib/libfdt/version.lds create mode 100644 tests/kvm-unit-tests/lib/linux/const.h create mode 100644 tests/kvm-unit-tests/lib/linux/pci_regs.h create mode 100644 tests/kvm-unit-tests/lib/linux/psci.h create mode 100644 tests/kvm-unit-tests/lib/pci-edu.c create mode 100644 tests/kvm-unit-tests/lib/pci-edu.h create mode 100644 tests/kvm-unit-tests/lib/pci-host-generic.c create mode 100644 tests/kvm-unit-tests/lib/pci-host-generic.h create mode 100644 tests/kvm-unit-tests/lib/pci-testdev.c create mode 100644 tests/kvm-unit-tests/lib/pci.c create mode 100644 tests/kvm-unit-tests/lib/pci.h create mode 100644 tests/kvm-unit-tests/lib/printf.c create mode 100644 tests/kvm-unit-tests/lib/report.c create mode 100644 tests/kvm-unit-tests/lib/setjmp.h create mode 100644 tests/kvm-unit-tests/lib/stack.c create mode 100644 tests/kvm-unit-tests/lib/stack.h create mode 100644 tests/kvm-unit-tests/lib/string.c create mode 100644 tests/kvm-unit-tests/lib/string.h create mode 100644 tests/kvm-unit-tests/lib/util.c create mode 100644 tests/kvm-unit-tests/lib/util.h create mode 100644 tests/kvm-unit-tests/lib/virtio-mmio.c create mode 100644 tests/kvm-unit-tests/lib/virtio-mmio.h create mode 100644 tests/kvm-unit-tests/lib/virtio.c create mode 100644 tests/kvm-unit-tests/lib/virtio.h create mode 100644 tests/kvm-unit-tests/lib/x86/acpi.c create mode 100644 tests/kvm-unit-tests/lib/x86/acpi.h create mode 100644 tests/kvm-unit-tests/lib/x86/apic-defs.h create mode 100644 tests/kvm-unit-tests/lib/x86/apic.c create mode 100644 tests/kvm-unit-tests/lib/x86/apic.h create mode 100644 tests/kvm-unit-tests/lib/x86/asm/barrier.h create mode 100644 tests/kvm-unit-tests/lib/x86/asm/bitops.h create mode 100644 tests/kvm-unit-tests/lib/x86/asm/io.h create mode 100644 tests/kvm-unit-tests/lib/x86/asm/page.h create mode 100644 tests/kvm-unit-tests/lib/x86/asm/pci.h create mode 100644 tests/kvm-unit-tests/lib/x86/asm/spinlock.h create mode 100644 tests/kvm-unit-tests/lib/x86/asm/stack.h create mode 100644 tests/kvm-unit-tests/lib/x86/atomic.c create mode 100644 tests/kvm-unit-tests/lib/x86/atomic.h create mode 100644 tests/kvm-unit-tests/lib/x86/desc.c create mode 100644 tests/kvm-unit-tests/lib/x86/desc.h create mode 100644 tests/kvm-unit-tests/lib/x86/fake-apic.h create mode 100644 tests/kvm-unit-tests/lib/x86/fwcfg.c create mode 100644 tests/kvm-unit-tests/lib/x86/fwcfg.h create mode 100644 tests/kvm-unit-tests/lib/x86/intel-iommu.c create mode 100644 tests/kvm-unit-tests/lib/x86/intel-iommu.h create mode 100644 tests/kvm-unit-tests/lib/x86/io.c create mode 100644 tests/kvm-unit-tests/lib/x86/isr.c create mode 100644 tests/kvm-unit-tests/lib/x86/isr.h create mode 100644 tests/kvm-unit-tests/lib/x86/msr.h create mode 100644 tests/kvm-unit-tests/lib/x86/processor.h create mode 100644 tests/kvm-unit-tests/lib/x86/setjmp32.S create mode 100644 tests/kvm-unit-tests/lib/x86/setjmp64.S create mode 100644 tests/kvm-unit-tests/lib/x86/setup.c create mode 100644 tests/kvm-unit-tests/lib/x86/smp.c create mode 100644 tests/kvm-unit-tests/lib/x86/smp.h create mode 100644 tests/kvm-unit-tests/lib/x86/stack.c create mode 100644 tests/kvm-unit-tests/lib/x86/vm.c create mode 100644 tests/kvm-unit-tests/lib/x86/vm.h create mode 100755 tests/kvm-unit-tests/run.js create mode 100644 tests/kvm-unit-tests/x86/Makefile create mode 100644 tests/kvm-unit-tests/x86/Makefile.common create mode 100644 tests/kvm-unit-tests/x86/Makefile.i386 create mode 100644 tests/kvm-unit-tests/x86/Makefile.x86_64 create mode 100644 tests/kvm-unit-tests/x86/README create mode 100644 tests/kvm-unit-tests/x86/access.c create mode 100644 tests/kvm-unit-tests/x86/apic.c create mode 100644 tests/kvm-unit-tests/x86/asyncpf.c create mode 100644 tests/kvm-unit-tests/x86/cmpxchg8b.c create mode 100644 tests/kvm-unit-tests/x86/cstart.S create mode 100644 tests/kvm-unit-tests/x86/cstart64.S create mode 100644 tests/kvm-unit-tests/x86/debug.c create mode 100644 tests/kvm-unit-tests/x86/emulator.c create mode 100644 tests/kvm-unit-tests/x86/eventinj.c create mode 100644 tests/kvm-unit-tests/x86/flat.lds create mode 100644 tests/kvm-unit-tests/x86/hypercall.c create mode 100644 tests/kvm-unit-tests/x86/hyperv.c create mode 100644 tests/kvm-unit-tests/x86/hyperv.h create mode 100644 tests/kvm-unit-tests/x86/hyperv_clock.c create mode 100644 tests/kvm-unit-tests/x86/hyperv_stimer.c create mode 100644 tests/kvm-unit-tests/x86/hyperv_synic.c create mode 100644 tests/kvm-unit-tests/x86/idt_test.c create mode 100644 tests/kvm-unit-tests/x86/init.c create mode 100644 tests/kvm-unit-tests/x86/intel-iommu.c create mode 100644 tests/kvm-unit-tests/x86/ioapic.c create mode 100644 tests/kvm-unit-tests/x86/ioram.h create mode 100644 tests/kvm-unit-tests/x86/kvmclock.c create mode 100644 tests/kvm-unit-tests/x86/kvmclock.h create mode 100644 tests/kvm-unit-tests/x86/kvmclock_test.c create mode 100644 tests/kvm-unit-tests/x86/memory.c create mode 100644 tests/kvm-unit-tests/x86/msr.c create mode 100644 tests/kvm-unit-tests/x86/pcid.c create mode 100644 tests/kvm-unit-tests/x86/pku.c create mode 100644 tests/kvm-unit-tests/x86/pmu.c create mode 100644 tests/kvm-unit-tests/x86/port80.c create mode 100644 tests/kvm-unit-tests/x86/realmode.c create mode 100644 tests/kvm-unit-tests/x86/realmode.lds create mode 100644 tests/kvm-unit-tests/x86/rmap_chain.c create mode 100755 tests/kvm-unit-tests/x86/run create mode 100644 tests/kvm-unit-tests/x86/s3.c create mode 100644 tests/kvm-unit-tests/x86/setjmp.c create mode 100644 tests/kvm-unit-tests/x86/sieve.c create mode 100644 tests/kvm-unit-tests/x86/smap.c create mode 100644 tests/kvm-unit-tests/x86/smptest.c create mode 100644 tests/kvm-unit-tests/x86/svm.c create mode 100644 tests/kvm-unit-tests/x86/svm.h create mode 100644 tests/kvm-unit-tests/x86/taskswitch.c create mode 100644 tests/kvm-unit-tests/x86/taskswitch2.c create mode 100644 tests/kvm-unit-tests/x86/tsc.c create mode 100644 tests/kvm-unit-tests/x86/tsc_adjust.c create mode 100644 tests/kvm-unit-tests/x86/tscdeadline_latency.c create mode 100644 tests/kvm-unit-tests/x86/types.h create mode 100644 tests/kvm-unit-tests/x86/unittests.cfg create mode 100644 tests/kvm-unit-tests/x86/vmexit.c create mode 100644 tests/kvm-unit-tests/x86/vmx.c create mode 100644 tests/kvm-unit-tests/x86/vmx.h create mode 100644 tests/kvm-unit-tests/x86/vmx_tests.c create mode 100644 tests/kvm-unit-tests/x86/xsave.c diff --git a/tests/kvm-unit-tests/.gitignore b/tests/kvm-unit-tests/.gitignore new file mode 100644 index 00000000..2213b9b1 --- /dev/null +++ b/tests/kvm-unit-tests/.gitignore @@ -0,0 +1,19 @@ +.gdbinit +*.a +*.d +*.o +*.flat +*.elf +.pc +patches +.stgit-* +cscope.* +*.swp +/lib/asm +/config.mak +/*-run +/msr.out +/tests +/build-head +/logs/ +/logs.old/ diff --git a/tests/kvm-unit-tests/COPYRIGHT b/tests/kvm-unit-tests/COPYRIGHT new file mode 100644 index 00000000..d35649cb --- /dev/null +++ b/tests/kvm-unit-tests/COPYRIGHT @@ -0,0 +1,4 @@ +Copyright (C) 2006 Qumranet. + +The files in this directory and its subdirectories are licensed under the +GNU LGPL, version 2. diff --git a/tests/kvm-unit-tests/MAINTAINERS b/tests/kvm-unit-tests/MAINTAINERS new file mode 100644 index 00000000..b86dea73 --- /dev/null +++ b/tests/kvm-unit-tests/MAINTAINERS @@ -0,0 +1,84 @@ +KVM Unit Tests Maintainers +========================== + +The intention of this file is not to establish who owns what portions of the +code base, but to provide a set of names that developers can consult when they +have a question about a particular subset and also to provide a set of names +to be CC'd when submitting a patch to obtain appropriate review. + +In general, if you have a question about inclusion of a patch, you +should consult the KVM mailing list and not any +specific individual privately. + +Descriptions of section entries: + + M: Mail patches to: FullName + L: Mailing list that is relevant to this area + W: Web-page with status/info + Q: Patchwork web based patch tracking system site + T: SCM tree type and location. Type is one of: git, hg, quilt, stgit. + S: Status, one of the following: + Supported: Someone is actually paid to look after this. + Maintained: Someone actually looks after it. + Odd Fixes: It has a maintainer but they don't have time to do + much other than throw the odd patch in. See below. + Orphan: No current maintainer [but maybe you could take the + role as you write your new code]. + Obsolete: Old code. Something tagged obsolete generally means + it has been replaced by a better system and you + should be using that. + F: Files and directories with wildcard patterns. + A trailing slash includes all files and subdirectory files. + F: drivers/net/ all files in and below drivers/net + F: drivers/net/* all files in drivers/net, but not below + F: */net/* all files in "any top level directory"/net + One pattern per line. Multiple F: lines acceptable. + X: Files and directories that are NOT maintained, same rules as F: + Files exclusions are tested before file matches. + Can be useful for excluding a specific subdirectory, for instance: + F: net/ + X: net/ipv6/ + matches all files in and below net excluding net/ipv6/ + K: Keyword perl extended regex pattern to match content in a + patch or file. For instance: + K: of_get_profile + matches patches or files that contain "of_get_profile" + K: \b(printk|pr_(info|err))\b + matches patches or files that contain one or more of the words + printk, pr_info or pr_err + One regex pattern per line. Multiple K: lines acceptable. + + +Maintainers +----------- +M: Paolo Bonzini +M: Radim Krčmář +L: kvm@vger.kernel.org +T: git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git + +Architecture Specific Code: +--------------------------- + +ARM +M: Drew Jones +L: kvm@vger.kernel.org +L: kvmarm@lists.cs.columbia.edu +F: arm/* +F: lib/arm/* +F: lib/arm64/* + +POWERPC +M: Laurent Vivier +M: Thomas Huth +L: kvm@vger.kernel.org +L: kvm-ppc@vger.kernel.org +F: powerpc/* +F: lib/powerpc/* +F: lib/ppc64/* + +X86 +M: Paolo Bonzini +M: Radim Krčmář +L: kvm@vger.kernel.org +F: x86/* +F: lib/x86/* diff --git a/tests/kvm-unit-tests/Makefile b/tests/kvm-unit-tests/Makefile new file mode 100644 index 00000000..16ce2970 --- /dev/null +++ b/tests/kvm-unit-tests/Makefile @@ -0,0 +1,109 @@ + +SHELL := /bin/bash + +ifeq ($(wildcard config.mak),) +$(error run ./configure first. See ./configure -h) +endif + +include config.mak + +libdirs-get = $(shell [ -d "lib/$(1)" ] && echo "lib/$(1) lib/$(1)/asm") +ARCH_LIBDIRS := $(call libdirs-get,$(ARCH)) $(call libdirs-get,$(TEST_DIR)) + +DESTDIR := $(PREFIX)/share/kvm-unit-tests/ + +.PHONY: arch_clean clean distclean cscope + +#make sure env CFLAGS variable is not used +CFLAGS = + +libgcc := $(shell $(CC) --print-libgcc-file-name) + +libcflat := lib/libcflat.a +cflatobjs := \ + lib/argv.o \ + lib/printf.o \ + lib/string.o \ + lib/abort.o \ + lib/report.o \ + lib/stack.o + +# libfdt paths +LIBFDT_objdir = lib/libfdt +LIBFDT_srcdir = lib/libfdt +LIBFDT_archive = $(LIBFDT_objdir)/libfdt.a +LIBFDT_include = $(addprefix $(LIBFDT_srcdir)/,$(LIBFDT_INCLUDES)) +LIBFDT_version = $(addprefix $(LIBFDT_srcdir)/,$(LIBFDT_VERSION)) + +#include architecure specific make rules +include $(TEST_DIR)/Makefile + +# cc-option +# Usage: OP_CFLAGS+=$(call cc-option, -falign-functions=0, -malign-functions=0) + +cc-option = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null \ + > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;) + +CFLAGS += -g +CFLAGS += $(autodepend-flags) -Wall -Werror +frame-pointer-flag=-f$(if $(KEEP_FRAME_POINTER),no-,)omit-frame-pointer +fomit_frame_pointer := $(call cc-option, $(frame-pointer-flag), "") +fnostack_protector := $(call cc-option, -fno-stack-protector, "") +fnostack_protector_all := $(call cc-option, -fno-stack-protector-all, "") +wno_frame_address := $(call cc-option, -Wno-frame-address, "") +fno_pic := $(call cc-option, -fno-pic, "") +no_pie := $(call cc-option, -no-pie, "") +CFLAGS += $(fomit_frame_pointer) +CFLAGS += $(fno_stack_protector) +CFLAGS += $(fno_stack_protector_all) +CFLAGS += $(wno_frame_address) +CFLAGS += $(if $(U32_LONG_FMT),-D__U32_LONG_FMT__,) +CFLAGS += $(fno_pic) $(no_pie) + +CXXFLAGS += $(CFLAGS) + +autodepend-flags = -MMD -MF $(dir $*).$(notdir $*).d + +LDFLAGS += $(CFLAGS) +LDFLAGS += -pthread -lrt + +$(libcflat): $(cflatobjs) + $(AR) rcs $@ $^ + +include $(LIBFDT_srcdir)/Makefile.libfdt +$(LIBFDT_archive): CFLAGS += -ffreestanding -I lib -I lib/libfdt -Wno-sign-compare +$(LIBFDT_archive): $(addprefix $(LIBFDT_objdir)/,$(LIBFDT_OBJS)) + $(AR) rcs $@ $^ + +%.o: %.S + $(CC) $(CFLAGS) -c -nostdlib -o $@ $< + +-include */.*.d */*/.*.d + +all: $(shell git rev-parse --verify --short=8 HEAD >build-head 2>/dev/null) + +standalone: all + @scripts/mkstandalone.sh + +install: standalone + mkdir -p $(DESTDIR) + install tests/* $(DESTDIR) + +clean: arch_clean + $(RM) lib/.*.d $(libcflat) $(cflatobjs) + +libfdt_clean: + $(RM) $(LIBFDT_archive) \ + $(addprefix $(LIBFDT_objdir)/,$(LIBFDT_OBJS)) \ + $(LIBFDT_objdir)/.*.d + +distclean: clean libfdt_clean + $(RM) lib/asm config.mak $(TEST_DIR)-run msr.out cscope.* build-head + $(RM) -r tests logs logs.old + +cscope: cscope_dirs = lib lib/libfdt lib/linux $(TEST_DIR) $(ARCH_LIBDIRS) lib/asm-generic +cscope: + $(RM) ./cscope.* + find -L $(cscope_dirs) -maxdepth 1 \ + -name '*.[chsS]' -print | sed 's,^\./,,' | sort -u > ./cscope.files + cscope -bk diff --git a/tests/kvm-unit-tests/README.md b/tests/kvm-unit-tests/README.md new file mode 100644 index 00000000..28a4ffd5 --- /dev/null +++ b/tests/kvm-unit-tests/README.md @@ -0,0 +1,131 @@ +# kvm-unit-tests for v86 + +Run the following to run this test: + +```sh +./configure +make +make -C ../../build/libv86.js +./run.js x86/realmode.flat +./run.js x86/setjmp.flat +./run.js x86/cmpxchg8b.flat +./run.js x86/sieve.flat +./run.js x86/ioapic.flat +./run.js x86/apic.flat +``` + +Tests can also be run in browser by going to `?profile=test-$name` (for +example, `?profile=test-realmode`). + + +# Welcome to kvm-unit-tests + +See http://www.linux-kvm.org/page/KVM-unit-tests for a high-level +description of this project, as well as running tests and adding +tests HOWTOs. + +# Building the tests + +This directory contains sources for a kvm test suite. + +To create the test images do: + + ./configure + make + +in this directory. Test images are created in .//*.flat + +## Standalone tests + +The tests can be built as standalone +To create and use standalone tests do: + + ./configure + make standalone + (send tests/some-test somewhere) + (go to somewhere) + ./some-test + +'make install' will install all tests in PREFIX/share/kvm-unit-tests/tests, +each as a standalone test. + + +# Running the tests + +Then use the runner script to detect the correct invocation and +invoke the test: + + ./x86-run ./x86/msr.flat +or: + + ./run_tests.sh + +to run them all. + +To select a specific qemu binary, specify the QEMU= +environment variable: + + QEMU=/tmp/qemu/x86_64-softmmu/qemu-system-x86_64 ./x86-run ./x86/msr.flat + +# Unit test inputs + +Unit tests use QEMU's '-append ' parameter for command line +inputs, i.e. all args will be available as argv strings in main(). +Additionally a file of the form + +KEY=VAL +KEY2=VAL +... + +may be passed with '-initrd ' to become the unit test's environ, +which can then be accessed in the usual ways, e.g. VAL = getenv("KEY") +Any key=val strings can be passed, but some have reserved meanings in +the framework. The list of reserved environment variables is below + + QEMU_ACCEL ... either kvm or tcg + QEMU_VERSION_STRING ... string of the form `qemu -h | head -1` + KERNEL_VERSION_STRING ... string of the form `uname -r` + +Additionally these self-explanatory variables are reserved + + QEMU_MAJOR, QEMU_MINOR, QEMU_MICRO, KERNEL_VERSION, KERNEL_PATCHLEVEL, + KERNEL_SUBLEVEL, KERNEL_EXTRAVERSION + +# Contributing + +## Directory structure + + .: configure script, top-level Makefile, and run_tests.sh + ./scripts: helper scripts for building and running tests + ./lib: general architecture neutral services for the tests + ./lib/: architecture dependent services for the tests + ./: the sources of the tests and the created objects/images + +See /README for architecture specific documentation. + +## Style + +Currently there is a mix of indentation styles so any changes to +existing files should be consistent with the existing style. For new +files: + + - C: please use standard linux-with-tabs + - Shell: use TABs for indentation + +## Patches + +Patches are welcome at the KVM mailing list . + +Please prefix messages with: [kvm-unit-tests PATCH] + +You can add the following to .git/config to do this automatically for you: + + [format] + subjectprefix = kvm-unit-tests PATCH + +Additionally it's helpful to have a common order of file types in patches. +Our chosen order attempts to place the more declarative files before +the code files. We also start with common code and finish with unit test +code. git-diff's orderFile feature allows us to specify the order in a +file. The orderFile we use is `scripts/git.difforder`. Adding the config +with `git config diff.orderFile scripts/git.difforder` enables it. diff --git a/tests/kvm-unit-tests/configure b/tests/kvm-unit-tests/configure new file mode 100755 index 00000000..b653b65f --- /dev/null +++ b/tests/kvm-unit-tests/configure @@ -0,0 +1,167 @@ +#!/bin/bash + +prefix=/usr/local +cc=gcc +ld=ld +objcopy=objcopy +objdump=objdump +ar=ar +addr2line=addr2line +arch=i386 +host=$arch +cross_prefix= +endian="" +pretty_print_stacks=yes +u32_long= + +usage() { + cat <<-EOF + Usage: $0 [options] + + Options include: + --arch=ARCH architecture to compile for ($arch) + --processor=PROCESSOR processor to compile for ($arch) + --cross-prefix=PREFIX cross compiler prefix + --cc=CC c compiler to use ($cc) + --ld=LD ld linker to use ($ld) + --prefix=PREFIX where to install things ($prefix) + --endian=ENDIAN endianness to compile for (little or big, ppc64 only) + --[enable|disable]-pretty-print-stacks + enable or disable pretty stack printing (enabled by default) +EOF + exit 1 +} + +while [[ "$1" = -* ]]; do + opt="$1"; shift + arg= + if [[ "$opt" = *=* ]]; then + arg="${opt#*=}" + opt="${opt%%=*}" + fi + case "$opt" in + --prefix) + prefix="$arg" + ;; + --arch) + arch="$arg" + ;; + --processor) + processor="$arg" + ;; + --cross-prefix) + cross_prefix="$arg" + ;; + --endian) + endian="$arg" + ;; + --cc) + cc="$arg" + ;; + --ld) + ld="$arg" + ;; + --enable-pretty-print-stacks) + pretty_print_stacks=yes + ;; + --disable-pretty-print-stacks) + pretty_print_stacks=no + ;; + --help) + usage + ;; + *) + usage + ;; + esac +done + +arch_name=$arch +[ "$arch" = "aarch64" ] && arch="arm64" +[ "$arch_name" = "arm64" ] && arch_name="aarch64" + +[ -z "$processor" ] && processor="$arch" + +if [ "$processor" = "arm64" ]; then + processor="cortex-a57" +elif [ "$processor" = "arm" ]; then + processor="cortex-a15" +fi + +if [ "$arch" = "i386" ] || [ "$arch" = "x86_64" ]; then + testdir=x86 +elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then + testdir=arm +elif [ "$arch" = "ppc64" ]; then + testdir=powerpc + firmware="$testdir/boot_rom.bin" + if [ "$endian" != "little" ] && [ "$endian" != "big" ]; then + echo "You must provide endianness (big or little)!" + usage + fi +else + testdir=$arch +fi +if [ ! -d $testdir ]; then + echo "$testdir does not exist!" + exit 1 +fi +if [ -f $testdir/run ]; then + ln -fs $testdir/run $testdir-run +fi + +# check if uint32_t needs a long format modifier +cat << EOF > lib-test.c +__UINT32_TYPE__ +EOF +u32_long=$($cross_prefix$cc -E lib-test.c | grep -v '^#' | grep -q long && echo yes) +rm -f lib-test.c + +# check for dependent 32 bit libraries +if [ "$arch" != "arm" ]; then +cat << EOF > lib_test.c +#include +#include +#include + +int main () +{} +EOF +$cc -m32 -o /dev/null lib_test.c &> /dev/null +exit=$? +if [ $exit -eq 0 ]; then + api=true +fi +rm -f lib_test.c +fi + +# link lib/asm for the architecture +rm -f lib/asm +asm=asm-generic +if [ -d lib/$arch/asm ]; then + asm=$arch/asm +elif [ -d lib/$testdir/asm ]; then + asm=$testdir/asm +fi +ln -s $asm lib/asm + +# create the config +cat < config.mak +PREFIX=$prefix +HOST=$host +ARCH=$arch +ARCH_NAME=$arch_name +PROCESSOR=$processor +CC=$cross_prefix$cc +LD=$cross_prefix$ld +OBJCOPY=$cross_prefix$objcopy +OBJDUMP=$cross_prefix$objdump +AR=$cross_prefix$ar +ADDR2LINE=$cross_prefix$addr2line +API=$api +TEST_DIR=$testdir +FIRMWARE=$firmware +ENDIAN=$endian +PRETTY_PRINT_STACKS=$pretty_print_stacks +U32_LONG_FMT=$u32_long +EOF diff --git a/tests/kvm-unit-tests/lib/abort.c b/tests/kvm-unit-tests/lib/abort.c new file mode 100644 index 00000000..61f7f924 --- /dev/null +++ b/tests/kvm-unit-tests/lib/abort.c @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" + +/* + * When exit(code) is invoked, qemu will exit with ((code << 1) | 1), + * leaving us 128 exit status codes. To avoid confusion with signal + * status, we further limit exit codes to those resulting in qemu + * exiting with a status < 128. We give abort() the highest (127), + * leaving the lower status codes for unit tests. + */ +#define ABORT_EXIT_STATUS 63 /* 127 exit status from qemu */ + +void abort(void) +{ + exit(ABORT_EXIT_STATUS); +} diff --git a/tests/kvm-unit-tests/lib/alloc.c b/tests/kvm-unit-tests/lib/alloc.c new file mode 100644 index 00000000..58af52b3 --- /dev/null +++ b/tests/kvm-unit-tests/lib/alloc.c @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "alloc.h" +#include "asm/spinlock.h" +#include "asm/io.h" + +#define PHYS_ALLOC_NR_REGIONS 256 + +struct phys_alloc_region { + phys_addr_t base; + phys_addr_t size; +}; + +static struct phys_alloc_region regions[PHYS_ALLOC_NR_REGIONS]; +static int nr_regions; + +static struct spinlock lock; +static phys_addr_t base, top, align_min; + +void phys_alloc_show(void) +{ + int i; + + spin_lock(&lock); + printf("phys_alloc minimum alignment: 0x%" PRIx64 "\n", + (u64)align_min); + for (i = 0; i < nr_regions; ++i) + printf("%016" PRIx64 "-%016" PRIx64 " [%s]\n", + (u64)regions[i].base, + (u64)(regions[i].base + regions[i].size - 1), + "USED"); + printf("%016" PRIx64 "-%016" PRIx64 " [%s]\n", + (u64)base, (u64)(top - 1), "FREE"); + spin_unlock(&lock); +} + +void phys_alloc_init(phys_addr_t base_addr, phys_addr_t size) +{ + spin_lock(&lock); + base = base_addr; + top = base + size; + align_min = DEFAULT_MINIMUM_ALIGNMENT; + nr_regions = 0; + spin_unlock(&lock); +} + +void phys_alloc_set_minimum_alignment(phys_addr_t align) +{ + assert(align && !(align & (align - 1))); + spin_lock(&lock); + align_min = align; + spin_unlock(&lock); +} + +static phys_addr_t phys_alloc_aligned_safe(phys_addr_t size, + phys_addr_t align, bool safe) +{ + static bool warned = false; + phys_addr_t addr, size_orig = size; + u64 top_safe; + + spin_lock(&lock); + + top_safe = top; + + if (safe && sizeof(long) == 4) + top_safe = MIN(top_safe, 1ULL << 32); + + align = MAX(align, align_min); + + addr = ALIGN(base, align); + size += addr - base; + + if ((top_safe - base) < size) { + printf("phys_alloc: requested=0x%" PRIx64 + " (align=0x%" PRIx64 "), " + "need=0x%" PRIx64 ", but free=0x%" PRIx64 ". " + "top=0x%" PRIx64 ", top_safe=0x%" PRIx64 "\n", + (u64)size_orig, (u64)align, (u64)size, top_safe - base, + (u64)top, top_safe); + spin_unlock(&lock); + return INVALID_PHYS_ADDR; + } + + base += size; + + if (nr_regions < PHYS_ALLOC_NR_REGIONS) { + regions[nr_regions].base = addr; + regions[nr_regions].size = size_orig; + ++nr_regions; + } else if (!warned) { + printf("WARNING: phys_alloc: No free log entries, " + "can no longer log allocations...\n"); + warned = true; + } + + spin_unlock(&lock); + + return addr; +} + +static phys_addr_t phys_zalloc_aligned_safe(phys_addr_t size, + phys_addr_t align, bool safe) +{ + phys_addr_t addr = phys_alloc_aligned_safe(size, align, safe); + if (addr == INVALID_PHYS_ADDR) + return addr; + + memset(phys_to_virt(addr), 0, size); + return addr; +} + +phys_addr_t phys_alloc_aligned(phys_addr_t size, phys_addr_t align) +{ + return phys_alloc_aligned_safe(size, align, false); +} + +phys_addr_t phys_zalloc_aligned(phys_addr_t size, phys_addr_t align) +{ + return phys_zalloc_aligned_safe(size, align, false); +} + +phys_addr_t phys_alloc(phys_addr_t size) +{ + return phys_alloc_aligned(size, align_min); +} + +phys_addr_t phys_zalloc(phys_addr_t size) +{ + return phys_zalloc_aligned(size, align_min); +} + +static void *early_malloc(size_t size) +{ + phys_addr_t addr = phys_alloc_aligned_safe(size, align_min, true); + if (addr == INVALID_PHYS_ADDR) + return NULL; + + return phys_to_virt(addr); +} + +static void *early_calloc(size_t nmemb, size_t size) +{ + phys_addr_t addr = phys_zalloc_aligned_safe(nmemb * size, + align_min, true); + if (addr == INVALID_PHYS_ADDR) + return NULL; + + return phys_to_virt(addr); +} + +static void early_free(void *ptr __unused) +{ +} + +static void *early_memalign(size_t alignment, size_t size) +{ + phys_addr_t addr; + + assert(alignment && !(alignment & (alignment - 1))); + + addr = phys_alloc_aligned_safe(size, alignment, true); + if (addr == INVALID_PHYS_ADDR) + return NULL; + + return phys_to_virt(addr); +} + +static struct alloc_ops early_alloc_ops = { + .malloc = early_malloc, + .calloc = early_calloc, + .free = early_free, + .memalign = early_memalign, +}; + +struct alloc_ops *alloc_ops = &early_alloc_ops; diff --git a/tests/kvm-unit-tests/lib/alloc.h b/tests/kvm-unit-tests/lib/alloc.h new file mode 100644 index 00000000..81f5369c --- /dev/null +++ b/tests/kvm-unit-tests/lib/alloc.h @@ -0,0 +1,116 @@ +#ifndef _ALLOC_H_ +#define _ALLOC_H_ +/* + * alloc supplies three ingredients to the test framework that are all + * related to the support of dynamic memory allocation. + * + * The first is a set of alloc function wrappers for malloc and its + * friends. Using wrappers allows test code and common code to use the + * same interface for memory allocation at all stages, even though the + * implementations may change with the stage, e.g. pre/post paging. + * + * The second is a set of implementations for the alloc function + * interfaces. These implementations are named early_*, as they can be + * used almost immediately by the test framework. + * + * The third is a very simple physical memory allocator, which the + * early_* alloc functions build on. + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" + +struct alloc_ops { + void *(*malloc)(size_t size); + void *(*calloc)(size_t nmemb, size_t size); + void (*free)(void *ptr); + void *(*memalign)(size_t alignment, size_t size); +}; + +/* + * alloc_ops is initialized to early_alloc_ops + */ +extern struct alloc_ops *alloc_ops; + +static inline void *malloc(size_t size) +{ + assert(alloc_ops && alloc_ops->malloc); + return alloc_ops->malloc(size); +} + +static inline void *calloc(size_t nmemb, size_t size) +{ + assert(alloc_ops && alloc_ops->calloc); + return alloc_ops->calloc(nmemb, size); +} + +static inline void free(void *ptr) +{ + assert(alloc_ops && alloc_ops->free); + alloc_ops->free(ptr); +} + +static inline void *memalign(size_t alignment, size_t size) +{ + assert(alloc_ops && alloc_ops->memalign); + return alloc_ops->memalign(alignment, size); +} + +/* + * phys_alloc is a very simple allocator which allows physical memory + * to be partitioned into regions until all memory is allocated. + * + * Note: This is such a simple allocator that there is no way to free + * a region. For more complicated memory management a single region + * can be allocated, but then have its memory managed by a more + * sophisticated allocator, e.g. a page allocator. + */ +#define DEFAULT_MINIMUM_ALIGNMENT 32 + +/* + * phys_alloc_init creates the initial free memory region of size @size + * at @base. The minimum alignment is set to DEFAULT_MINIMUM_ALIGNMENT. + */ +extern void phys_alloc_init(phys_addr_t base, phys_addr_t size); + +/* + * phys_alloc_set_minimum_alignment sets the minimum alignment to + * @align. + */ +extern void phys_alloc_set_minimum_alignment(phys_addr_t align); + +/* + * phys_alloc_aligned returns the base address of a region of size @size, + * where the address is aligned to @align, or INVALID_PHYS_ADDR if there + * isn't enough free memory to satisfy the request. + */ +extern phys_addr_t phys_alloc_aligned(phys_addr_t size, phys_addr_t align); + +/* + * phys_zalloc_aligned is like phys_alloc_aligned, but zeros the memory + * before returning the address. + */ +extern phys_addr_t phys_zalloc_aligned(phys_addr_t size, phys_addr_t align); + +/* + * phys_alloc returns the base address of a region of size @size, or + * INVALID_PHYS_ADDR if there isn't enough free memory to satisfy the + * request. + */ +extern phys_addr_t phys_alloc(phys_addr_t size); + +/* + * phys_zalloc is like phys_alloc, but zeros the memory before returning. + */ +extern phys_addr_t phys_zalloc(phys_addr_t size); + +/* + * phys_alloc_show outputs all currently allocated regions with the + * following format + * - [] + */ +extern void phys_alloc_show(void); + +#endif /* _ALLOC_H_ */ diff --git a/tests/kvm-unit-tests/lib/argv.c b/tests/kvm-unit-tests/lib/argv.c new file mode 100644 index 00000000..a37fc879 --- /dev/null +++ b/tests/kvm-unit-tests/lib/argv.c @@ -0,0 +1,141 @@ +#include "libcflat.h" +#include "auxinfo.h" + +int __argc; +char *__args; +char *__argv[100]; +char *__environ[200]; + +char **environ = __environ; + +static char args_copy[1000]; +static char *copy_ptr = args_copy; + +#define isblank(c) ((c) == ' ' || (c) == '\t') +#define isalpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z') || (c) == '_') +#define isalnum(c) (isalpha(c) || ((c) >= '0' && (c) <= '9')) + +static char *skip_blanks(char *p) +{ + while (isblank(*p)) + ++p; + return p; +} + +void __setup_args(void) +{ + char *args = __args; + char **argv = __argv + __argc; + + while (*(args = skip_blanks(args)) != '\0') { + *argv++ = copy_ptr; + while (*args != '\0' && !isblank(*args)) + *copy_ptr++ = *args++; + *copy_ptr++ = '\0'; + } + __argc = argv - __argv; +} + +void setup_args(char *args) +{ + if (!args) + return; + + __args = args; + __setup_args(); +} + +void setup_args_progname(char *args) +{ + __argv[0] = copy_ptr; + strcpy(__argv[0], auxinfo.progname); + copy_ptr += strlen(auxinfo.progname) + 1; + ++__argc; + if (args) { + __args = args; + __setup_args(); + } +} + +static char *env_eol(char *env) +{ + while (*env && *env != '\n') + ++env; + return env; +} + +static char *env_invalid_eol(char *env) +{ + char *eol = env_eol(env); + char eol_old = *eol; + + *eol = '\0'; + printf("Invalid environment variable: %s\n", env); + *eol = eol_old; + return eol; +} + +static char *env_next(char *env) +{ + char *p; + + if (!*env) + return env; + + if (isalpha(*env)) { + bool invalid = false; + + p = env + 1; + while (*p && *p != '=' && *p != '\n') { + if (!isalnum(*p)) + invalid = true; + ++p; + } + + if (*p != '=') + invalid = true; + + if (invalid) { + env = env_invalid_eol(env); + return *env ? env_next(env + 1) : env; + } + return env; + } + + p = env; + while (isblank(*p)) + ++p; + + if (*p == '\n') + return env_next(p + 1); + + if (*p == '#') + env = env_eol(env); + else + env = env_invalid_eol(env); + + return *env ? env_next(env + 1) : env; +} + +void setup_env(char *env, int size) +{ + char *eof = env + size, *p = env; + bool newline = false; + int i = 0; + + while (*p) + ++p; + if (p == eof) + newline = true; + + while (env < eof) { + if (newline) + env = env_next(env); + if (!*env || env >= eof) + break; + __environ[i++] = env; + while (env < eof && *env && !(newline && *env == '\n')) + ++env; + *env++ = '\0'; + } +} diff --git a/tests/kvm-unit-tests/lib/asm-generic/atomic.h b/tests/kvm-unit-tests/lib/asm-generic/atomic.h new file mode 100644 index 00000000..26b645a7 --- /dev/null +++ b/tests/kvm-unit-tests/lib/asm-generic/atomic.h @@ -0,0 +1,21 @@ +#ifndef __ASM_GENERIC_ATOMIC_H__ +#define __ASM_GENERIC_ATOMIC_H__ + +/* From QEMU include/qemu/atomic.h */ +#define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) +#define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) +#define atomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n) +#define atomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n) +#define atomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n) +#define atomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n) +#define atomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n) + +#define atomic_inc_fetch(ptr) __sync_add_and_fetch(ptr, 1) +#define atomic_dec_fetch(ptr) __sync_add_and_fetch(ptr, -1) +#define atomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n) +#define atomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n) +#define atomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n) +#define atomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n) +#define atomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n) + +#endif diff --git a/tests/kvm-unit-tests/lib/asm-generic/barrier.h b/tests/kvm-unit-tests/lib/asm-generic/barrier.h new file mode 100644 index 00000000..6a990ff8 --- /dev/null +++ b/tests/kvm-unit-tests/lib/asm-generic/barrier.h @@ -0,0 +1,35 @@ +#ifndef _ASM_BARRIER_H_ +#define _ASM_BARRIER_H_ +/* + * asm-generic/barrier.h + * + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ + +#ifndef mb +#define mb() asm volatile("":::"memory") +#endif +#ifndef rmb +#define rmb() asm volatile("":::"memory") +#endif +#ifndef wmb +#define wmb() asm volatile("":::"memory") +#endif + +#ifndef smp_mb +#define smp_mb() mb() +#endif +#ifndef smp_rmb +#define smp_rmb() rmb() +#endif +#ifndef smp_wmb +#define smp_wmb() wmb() +#endif + +#ifndef cpu_relax +#define cpu_relax() asm volatile ("":::"memory") +#endif + +#endif /* _ASM_BARRIER_H_ */ diff --git a/tests/kvm-unit-tests/lib/asm-generic/io.h b/tests/kvm-unit-tests/lib/asm-generic/io.h new file mode 100644 index 00000000..91a2d799 --- /dev/null +++ b/tests/kvm-unit-tests/lib/asm-generic/io.h @@ -0,0 +1,213 @@ +#ifndef _ASM_GENERIC_IO_H_ +#define _ASM_GENERIC_IO_H_ +/* + * asm-generic/io.h + * adapted from the Linux kernel's include/asm-generic/io.h + * and arch/arm/include/asm/io.h + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "asm/page.h" +#include "asm/barrier.h" + +#ifndef __raw_readb +static inline u8 __raw_readb(const volatile void *addr) +{ + return *(const volatile u8 *)addr; +} +#endif + +#ifndef __raw_readw +static inline u16 __raw_readw(const volatile void *addr) +{ + return *(const volatile u16 *)addr; +} +#endif + +#ifndef __raw_readl +static inline u32 __raw_readl(const volatile void *addr) +{ + return *(const volatile u32 *)addr; +} +#endif + +#ifndef __raw_readq +static inline u64 __raw_readq(const volatile void *addr) +{ + assert(sizeof(unsigned long) == sizeof(u64)); + return *(const volatile u64 *)addr; +} +#endif + +#ifndef __raw_writeb +static inline void __raw_writeb(u8 b, volatile void *addr) +{ + *(volatile u8 *)addr = b; +} +#endif + +#ifndef __raw_writew +static inline void __raw_writew(u16 b, volatile void *addr) +{ + *(volatile u16 *)addr = b; +} +#endif + +#ifndef __raw_writel +static inline void __raw_writel(u32 b, volatile void *addr) +{ + *(volatile u32 *)addr = b; +} +#endif + +#ifndef __raw_writeq +static inline void __raw_writeq(u64 b, volatile void *addr) +{ + assert(sizeof(unsigned long) == sizeof(u64)); + *(volatile u64 *)addr = b; +} +#endif + +#ifndef __bswap16 +static inline u16 __bswap16(u16 x) +{ + return ((x >> 8) & 0xff) | ((x & 0xff) << 8); +} +#endif + +#ifndef __bswap32 +static inline u32 __bswap32(u32 x) +{ + return ((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) | + ((x & 0x0000ff00) << 8) | ((x & 0x000000ff) << 24); +} +#endif + +#ifndef __bswap64 +static inline u64 __bswap64(u64 x) +{ + return ((x & 0x00000000000000ffULL) << 56) | + ((x & 0x000000000000ff00ULL) << 40) | + ((x & 0x0000000000ff0000ULL) << 24) | + ((x & 0x00000000ff000000ULL) << 8) | + ((x & 0x000000ff00000000ULL) >> 8) | + ((x & 0x0000ff0000000000ULL) >> 24) | + ((x & 0x00ff000000000000ULL) >> 40) | + ((x & 0xff00000000000000ULL) >> 56); +} +#endif + +#ifndef __cpu_is_be +#define __cpu_is_be() (0) +#endif + +#define le16_to_cpu(x) \ + ({ u16 __r = __cpu_is_be() ? __bswap16(x) : ((u16)x); __r; }) +#define cpu_to_le16 le16_to_cpu + +#define le32_to_cpu(x) \ + ({ u32 __r = __cpu_is_be() ? __bswap32(x) : ((u32)x); __r; }) +#define cpu_to_le32 le32_to_cpu + +#define le64_to_cpu(x) \ + ({ u64 __r = __cpu_is_be() ? __bswap64(x) : ((u64)x); __r; }) +#define cpu_to_le64 le64_to_cpu + +#define be16_to_cpu(x) \ + ({ u16 __r = !__cpu_is_be() ? __bswap16(x) : ((u16)x); __r; }) +#define cpu_to_be16 be16_to_cpu + +#define be32_to_cpu(x) \ + ({ u32 __r = !__cpu_is_be() ? __bswap32(x) : ((u32)x); __r; }) +#define cpu_to_be32 be32_to_cpu + +#define be64_to_cpu(x) \ + ({ u64 __r = !__cpu_is_be() ? __bswap64(x) : ((u64)x); __r; }) +#define cpu_to_be64 be64_to_cpu + +#define readb(addr) \ + ({ u8 __r = __raw_readb(addr); rmb(); __r; }) +#define readw(addr) \ + ({ u16 __r = le16_to_cpu(__raw_readw(addr)); rmb(); __r; }) +#define readl(addr) \ + ({ u32 __r = le32_to_cpu(__raw_readl(addr)); rmb(); __r; }) +#define readq(addr) \ + ({ u64 __r = le64_to_cpu(__raw_readq(addr)); rmb(); __r; }) + +#define writeb(b, addr) \ + ({ wmb(); __raw_writeb(b, addr); }) +#define writew(b, addr) \ + ({ wmb(); __raw_writew(cpu_to_le16(b), addr); }) +#define writel(b, addr) \ + ({ wmb(); __raw_writel(cpu_to_le32(b), addr); }) +#define writeq(b, addr) \ + ({ wmb(); __raw_writeq(cpu_to_le64(b), addr); }) + +#ifndef inb +static inline uint8_t inb(unsigned long port) +{ + return readb((const volatile void __iomem *)port); +} +#endif + +#ifndef inw +static inline uint16_t inw(unsigned long port) +{ + return readw((const volatile void __iomem *)port); +} +#endif + +#ifndef inl +static inline uint32_t inl(unsigned long port) +{ + return readl((const volatile void __iomem *)port); +} +#endif + +#ifndef outb +static inline void outb(uint8_t value, unsigned long port) +{ + writeb(value, (volatile void __iomem *)port); +} +#endif + +#ifndef outw +static inline void outw(uint16_t value, unsigned long port) +{ + writew(value, (volatile void __iomem *)port); +} +#endif + +#ifndef outl +static inline void outl(uint32_t value, unsigned long port) +{ + writel(value, (volatile void __iomem *)port); +} +#endif + +#ifndef ioremap +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size __unused) +{ + assert(sizeof(long) == 8 || !(phys_addr >> 32)); + return (void __iomem *)(unsigned long)phys_addr; +} +#endif + +#ifndef virt_to_phys +static inline unsigned long virt_to_phys(volatile void *address) +{ + return __pa((unsigned long)address); +} +#endif + +#ifndef phys_to_virt +static inline void *phys_to_virt(unsigned long address) +{ + return __va(address); +} +#endif + +#endif /* _ASM_GENERIC_IO_H_ */ diff --git a/tests/kvm-unit-tests/lib/asm-generic/page.h b/tests/kvm-unit-tests/lib/asm-generic/page.h new file mode 100644 index 00000000..7b8a08bf --- /dev/null +++ b/tests/kvm-unit-tests/lib/asm-generic/page.h @@ -0,0 +1,29 @@ +#ifndef _ASM_GENERIC_PAGE_H_ +#define _ASM_GENERIC_PAGE_H_ +/* + * asm-generic/page.h + * adapted from the Linux kernel's include/asm-generic/page.h + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ + +#include + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifndef __ASSEMBLY__ + +#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) + +#define __va(x) ((void *)((unsigned long) (x))) +#define __pa(x) ((unsigned long) (x)) +#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_GENERIC_PAGE_H_ */ diff --git a/tests/kvm-unit-tests/lib/asm-generic/pci-host-bridge.h b/tests/kvm-unit-tests/lib/asm-generic/pci-host-bridge.h new file mode 100644 index 00000000..9e91499b --- /dev/null +++ b/tests/kvm-unit-tests/lib/asm-generic/pci-host-bridge.h @@ -0,0 +1,28 @@ +#ifndef _ASM_PCI_HOST_BRIDGE_H_ +#define _ASM_PCI_HOST_BRIDGE_H_ +/* + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" + +phys_addr_t pci_host_bridge_get_paddr(uint64_t addr); + +static inline +phys_addr_t pci_translate_addr(pcidevaddr_t dev __unused, uint64_t addr) +{ + /* + * Assume we only have single PCI host bridge in a system. + */ + return pci_host_bridge_get_paddr(addr); +} + +uint8_t pci_config_readb(pcidevaddr_t dev, uint8_t reg); +uint16_t pci_config_readw(pcidevaddr_t dev, uint8_t reg); +uint32_t pci_config_readl(pcidevaddr_t dev, uint8_t reg); +void pci_config_writeb(pcidevaddr_t dev, uint8_t reg, uint8_t val); +void pci_config_writew(pcidevaddr_t dev, uint8_t reg, uint16_t val); +void pci_config_writel(pcidevaddr_t dev, uint8_t reg, uint32_t val); + +#endif diff --git a/tests/kvm-unit-tests/lib/asm-generic/pci.h b/tests/kvm-unit-tests/lib/asm-generic/pci.h new file mode 100644 index 00000000..3fa0b2ab --- /dev/null +++ b/tests/kvm-unit-tests/lib/asm-generic/pci.h @@ -0,0 +1,4 @@ +#ifndef _ASM_GENERIC_PCI_H_ +#define _ASM_GENERIC_PCI_H_ +#error need architecture specific asm/pci.h +#endif diff --git a/tests/kvm-unit-tests/lib/asm-generic/spinlock.h b/tests/kvm-unit-tests/lib/asm-generic/spinlock.h new file mode 100644 index 00000000..31417442 --- /dev/null +++ b/tests/kvm-unit-tests/lib/asm-generic/spinlock.h @@ -0,0 +1,4 @@ +#ifndef _ASM_GENERIC_SPINLOCK_H_ +#define _ASM_GENERIC_SPINLOCK_H_ +#error need architecture specific asm/spinlock.h +#endif diff --git a/tests/kvm-unit-tests/lib/auxinfo.c b/tests/kvm-unit-tests/lib/auxinfo.c new file mode 100644 index 00000000..bffeac2f --- /dev/null +++ b/tests/kvm-unit-tests/lib/auxinfo.c @@ -0,0 +1,2 @@ +#include "auxinfo.h" +struct auxinfo auxinfo = { PROGNAME }; diff --git a/tests/kvm-unit-tests/lib/auxinfo.h b/tests/kvm-unit-tests/lib/auxinfo.h new file mode 100644 index 00000000..ef2376b4 --- /dev/null +++ b/tests/kvm-unit-tests/lib/auxinfo.h @@ -0,0 +1,9 @@ +#ifndef _AUXINFO_H_ +#define _AUXINFO_H_ +struct auxinfo { + const char *progname; +}; + +/* No extern! Define a common symbol. */ +struct auxinfo auxinfo; +#endif diff --git a/tests/kvm-unit-tests/lib/bitops.h b/tests/kvm-unit-tests/lib/bitops.h new file mode 100644 index 00000000..9aa847e1 --- /dev/null +++ b/tests/kvm-unit-tests/lib/bitops.h @@ -0,0 +1,36 @@ +#ifndef _BITOPS_H_ +#define _BITOPS_H_ + +/* + * Adapated from + * include/linux/bitops.h + * + * Copyright (C) 2015, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ + +#define BITS_PER_LONG_LONG 64 +#define BIT(nr) (1UL << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) +#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) + +#include + +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define GENMASK_ULL(h, l) \ + (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + +#endif diff --git a/tests/kvm-unit-tests/lib/chr-testdev.c b/tests/kvm-unit-tests/lib/chr-testdev.c new file mode 100644 index 00000000..c19424fd --- /dev/null +++ b/tests/kvm-unit-tests/lib/chr-testdev.c @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "virtio.h" +#include "asm/spinlock.h" + +#define TESTDEV_NAME "chr-testdev" + +static struct virtio_device *vcon; +static struct virtqueue *in_vq, *out_vq; +static struct spinlock lock; + +static void __testdev_send(char *buf, unsigned int len) +{ + int ret; + + ret = virtqueue_add_outbuf(out_vq, buf, len); + virtqueue_kick(out_vq); + + if (ret < 0) + return; + + while (!virtqueue_get_buf(out_vq, &len)) + ; +} + +void chr_testdev_exit(int code) +{ + unsigned int len; + char buf[8]; + + snprintf(buf, sizeof(buf), "%dq", code); + len = strlen(buf); + + spin_lock(&lock); + + if (!vcon) + goto out; + + __testdev_send(buf, len); + +out: + spin_unlock(&lock); +} + +void chr_testdev_init(void) +{ + const char *io_names[] = { "input", "output" }; + struct virtqueue *vqs[2]; + int ret; + + vcon = virtio_bind(VIRTIO_ID_CONSOLE); + if (vcon == NULL) { + printf("%s: %s: can't find a virtio-console\n", + __func__, TESTDEV_NAME); + return; + } + + ret = vcon->config->find_vqs(vcon, 2, vqs, NULL, io_names); + if (ret < 0) { + printf("%s: %s: can't init virtqueues\n", + __func__, TESTDEV_NAME); + vcon = NULL; + return; + } + + in_vq = vqs[0]; + out_vq = vqs[1]; +} diff --git a/tests/kvm-unit-tests/lib/chr-testdev.h b/tests/kvm-unit-tests/lib/chr-testdev.h new file mode 100644 index 00000000..ffd9a851 --- /dev/null +++ b/tests/kvm-unit-tests/lib/chr-testdev.h @@ -0,0 +1,14 @@ +#ifndef _CHR_TESTDEV_H_ +#define _CHR_TESTDEV_H_ +/* + * chr-testdev is a driver for the chr-testdev qemu backend. + * The chr-testdev backend exposes a simple control interface to + * qemu for kvm-unit-tests accessible through virtio-console. + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +extern void chr_testdev_init(void); +extern void chr_testdev_exit(int code); +#endif diff --git a/tests/kvm-unit-tests/lib/devicetree.c b/tests/kvm-unit-tests/lib/devicetree.c new file mode 100644 index 00000000..2b89178a --- /dev/null +++ b/tests/kvm-unit-tests/lib/devicetree.c @@ -0,0 +1,334 @@ +/* + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "libfdt/libfdt.h" +#include "devicetree.h" + +static const void *fdt; + +const void *dt_fdt(void) +{ + return fdt; +} + +bool dt_available(void) +{ + return fdt_check_header(fdt) == 0; +} + +int dt_get_nr_cells(int fdtnode, u32 *nr_address_cells, u32 *nr_size_cells) +{ + const struct fdt_property *prop; + u32 *nr_cells; + int len, nac, nsc; + + prop = fdt_get_property(fdt, fdtnode, "#address-cells", &len); + if (prop == NULL) + return len; + + nr_cells = (u32 *)prop->data; + nac = fdt32_to_cpu(*nr_cells); + + prop = fdt_get_property(fdt, fdtnode, "#size-cells", &len); + if (prop == NULL) + return len; + + nr_cells = (u32 *)prop->data; + nsc = fdt32_to_cpu(*nr_cells); + + *nr_address_cells = nac; + *nr_size_cells = nsc; + + return 0; +} + +void dt_reg_init(struct dt_reg *reg, u32 nr_address_cells, u32 nr_size_cells) +{ + memset(reg, 0, sizeof(struct dt_reg)); + reg->nr_address_cells = nr_address_cells; + reg->nr_size_cells = nr_size_cells; +} + +int dt_get_reg(int fdtnode, int regidx, struct dt_reg *reg) +{ + const struct fdt_property *prop; + u32 *cells, i; + unsigned nr_tuple_cells; + int len; + + prop = fdt_get_property(fdt, fdtnode, "reg", &len); + if (prop == NULL) + return len; + + cells = (u32 *)prop->data; + nr_tuple_cells = reg->nr_address_cells + reg->nr_size_cells; + regidx *= nr_tuple_cells; + + if (regidx + nr_tuple_cells > len/sizeof(u32)) + return -FDT_ERR_NOTFOUND; + + for (i = 0; i < reg->nr_address_cells; ++i) + reg->address_cells[i] = fdt32_to_cpu(cells[regidx + i]); + + regidx += reg->nr_address_cells; + for (i = 0; i < reg->nr_size_cells; ++i) + reg->size_cells[i] = fdt32_to_cpu(cells[regidx + i]); + + return 0; +} + +int dt_pbus_translate_node(int fdtnode, int regidx, + struct dt_pbus_reg *pbus_reg) +{ + struct dt_reg raw_reg; + u32 nac, nsc; + int parent, ret; + + parent = fdt_parent_offset(fdt, fdtnode); + if (parent < 0) + return parent; + + ret = dt_get_nr_cells(parent, &nac, &nsc); + if (ret != 0) + return ret; + + dt_reg_init(&raw_reg, nac, nsc); + + ret = dt_get_reg(fdtnode, regidx, &raw_reg); + if (ret < 0) + return ret; + + pbus_reg->addr = dt_pbus_read_cells(raw_reg.nr_address_cells, + raw_reg.address_cells); + pbus_reg->size = dt_pbus_read_cells(raw_reg.nr_size_cells, + raw_reg.size_cells); + + return 0; +} + +int dt_pbus_translate(const struct dt_device *dev, int regidx, + void *reg) +{ + return dt_pbus_translate_node(dev->fdtnode, regidx, reg); +} + +int dt_bus_match_any(const struct dt_device *dev __unused, int fdtnode) +{ + /* matches any device with a valid node */ + return fdtnode < 0 ? fdtnode : 1; +} + +static const struct dt_bus dt_default_bus = { + .match = dt_bus_match_any, + .translate = dt_pbus_translate, +}; + +void dt_bus_init_defaults(struct dt_bus *bus) +{ + memcpy(bus, &dt_default_bus, sizeof(struct dt_bus)); +} + +void dt_device_init(struct dt_device *dev, const struct dt_bus *bus, + void *info) +{ + memset(dev, 0, sizeof(struct dt_device)); + dev->bus = bus; + dev->info = info; +} + +int dt_device_find_compatible(const struct dt_device *dev, + const char *compatible) +{ + int node, ret; + + node = fdt_node_offset_by_compatible(fdt, -1, compatible); + while (node >= 0) { + ret = dev->bus->match(dev, node); + if (ret < 0) + return ret; + else if (ret) + break; + node = fdt_node_offset_by_compatible(fdt, node, compatible); + } + return node; +} + +int dt_pbus_get_base_compatible(const char *compatible, + struct dt_pbus_reg *base) +{ + struct dt_device dev; + int node; + + dt_device_init(&dev, &dt_default_bus, NULL); + + node = dt_device_find_compatible(&dev, compatible); + if (node < 0) + return node; + + dt_device_bind_node(&dev, node); + + return dt_pbus_get_base(&dev, base); +} + +int dt_get_memory_params(struct dt_pbus_reg *regs, int nr_regs) +{ + const char *pn = "device_type", *pv = "memory"; + int node, ret, reg_idx, pl = strlen(pv) + 1, nr = 0; + struct dt_pbus_reg reg; + + node = fdt_node_offset_by_prop_value(fdt, -1, pn, pv, pl); + + while (node >= 0) { + + reg_idx = 0; + + while (nr < nr_regs) { + ret = dt_pbus_translate_node(node, reg_idx, ®); + if (ret == -FDT_ERR_NOTFOUND) + break; + if (ret < 0) + return ret; + regs[nr].addr = reg.addr; + regs[nr].size = reg.size; + ++nr, ++reg_idx; + } + + node = fdt_node_offset_by_prop_value(fdt, node, pn, pv, pl); + } + + return node != -FDT_ERR_NOTFOUND ? node : nr; +} + +int dt_for_each_cpu_node(void (*func)(int fdtnode, u64 regval, void *info), + void *info) +{ + const struct fdt_property *prop; + int cpus, cpu, ret, len; + struct dt_reg raw_reg; + u32 nac, nsc; + u64 regval; + + cpus = fdt_path_offset(fdt, "/cpus"); + if (cpus < 0) + return cpus; + + ret = dt_get_nr_cells(cpus, &nac, &nsc); + if (ret < 0) + return ret; + + dt_reg_init(&raw_reg, nac, nsc); + + dt_for_each_subnode(cpus, cpu) { + + prop = fdt_get_property(fdt, cpu, "device_type", &len); + if (prop == NULL) + return len; + + if (len != 4 || strcmp((char *)prop->data, "cpu")) + continue; + + ret = dt_get_reg(cpu, 0, &raw_reg); + if (ret < 0) + return ret; + + regval = raw_reg.address_cells[0]; + if (nac == 2) + regval = (regval << 32) | raw_reg.address_cells[1]; + + func(cpu, regval, info); + } + + return 0; +} + +int dt_get_bootargs(const char **bootargs) +{ + const struct fdt_property *prop; + int node, len; + + *bootargs = NULL; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return node; + + prop = fdt_get_property(fdt, node, "bootargs", &len); + if (!prop) + return len; + + *bootargs = prop->data; + return 0; +} + +int dt_get_default_console_node(void) +{ + const struct fdt_property *prop; + int node, len; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return node; + + prop = fdt_get_property(fdt, node, "stdout-path", &len); + if (!prop) { + prop = fdt_get_property(fdt, node, "linux,stdout-path", &len); + if (!prop) + return len; + } + + return fdt_path_offset(fdt, prop->data); +} + +int dt_get_initrd(const char **initrd, u32 *size) +{ + const struct fdt_property *prop; + const char *start, *end; + int node, len; + u32 *data; + + *initrd = NULL; + *size = 0; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return node; + + prop = fdt_get_property(fdt, node, "linux,initrd-start", &len); + if (!prop) + return len; + data = (u32 *)prop->data; + start = (const char *)(unsigned long)fdt32_to_cpu(*data); + + prop = fdt_get_property(fdt, node, "linux,initrd-end", &len); + if (!prop) { + assert(len != -FDT_ERR_NOTFOUND); + return len; + } + data = (u32 *)prop->data; + end = (const char *)(unsigned long)fdt32_to_cpu(*data); + + *initrd = start; + *size = (unsigned long)end - (unsigned long)start; + + return 0; +} + +int dt_init(const void *fdt_ptr) +{ + int ret; + + ret = fdt_check_header(fdt_ptr); + if (ret < 0) + return ret; + + /* Sanity check the path. */ + ret = fdt_path_offset(fdt_ptr, "/"); + if (ret < 0) + return ret; + + fdt = fdt_ptr; + return 0; +} diff --git a/tests/kvm-unit-tests/lib/devicetree.h b/tests/kvm-unit-tests/lib/devicetree.h new file mode 100644 index 00000000..93c7ebc6 --- /dev/null +++ b/tests/kvm-unit-tests/lib/devicetree.h @@ -0,0 +1,251 @@ +#ifndef _DEVICETREE_H_ +#define _DEVICETREE_H_ +/* + * devicetree builds on libfdt to implement abstractions and accessors + * for Linux required device tree content. The accessors provided are + * common across architectures. See section III of the kernel doc + * Documentation/devicetree/booting-without-of.txt + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "libfdt/libfdt.h" + +/********************************************************************** + * devicetree init and libfdt helpers + **********************************************************************/ + +/* dt_init initializes devicetree with a pointer to an fdt, @fdt_ptr */ +extern int dt_init(const void *fdt_ptr); + +/* get the fdt pointer that devicetree is using */ +extern const void *dt_fdt(void); + +/* check for an initialized, valid devicetree */ +extern bool dt_available(void); + +/* traverse child nodes */ +#define dt_for_each_subnode(n, s) \ + for (s = fdt_first_subnode(dt_fdt(), n); \ + s != -FDT_ERR_NOTFOUND; \ + s = fdt_next_subnode(dt_fdt(), s)) + +/********************************************************************** + * Abstractions for required node types and properties + **********************************************************************/ + +struct dt_device { + int fdtnode; + const struct dt_bus *bus; + + /* + * info is a pointer to device specific data, which may be + * used by the bus match() and translate() functions + */ + void *info; +}; + +struct dt_bus { + /* + * match a device @dev to an fdt node @fdtnode + * returns + * - a positive value on match + * - zero on no match + * - a negative FDT_ERR_* value on failure + */ + int (*match)(const struct dt_device *dev, int fdtnode); + + /* + * translate the @regidx'th "address size" tuple of + * @dev's fdt node's "reg" property, and store the result + * in @reg, a bus specific structure + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure + */ + int (*translate)(const struct dt_device *dev, int regidx, void *reg); +}; + +/* dt_bus_match_any matches any fdt node, i.e. it always returns true */ +extern int dt_bus_match_any(const struct dt_device *dev, int fdtnode); + +/* the processor bus (pbus) address type and register tuple */ +typedef u64 dt_pbus_addr_t; +struct dt_pbus_reg { + dt_pbus_addr_t addr; + dt_pbus_addr_t size; +}; + +static inline dt_pbus_addr_t dt_pbus_read_cells(u32 nr_cells, u32 *cells) +{ + switch (nr_cells) { + case 1: return cells[0]; + case 2: return ((u64)cells[0] << 32) | cells[1]; + } + return (~0ULL); +} + +/* + * dt_pbus_translate translates device node regs for the + * processor bus using the parent node's #address-cells + * and #size-cells and dt_pbus_read_cells() + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_pbus_translate(const struct dt_device *dev, int regidx, + void *reg); + +/* + * dt_pbus_translate_node is the same as dt_pbus_translate but + * operates on an fdt node instead of a dt_device + */ +extern int dt_pbus_translate_node(int fdtnode, int regidx, + struct dt_pbus_reg *reg); + +/* + * dt_pbus_get_base is an alias for + * dt_pbus_translate(dev, 0, base) + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure + */ +static inline int dt_pbus_get_base(const struct dt_device *dev, + struct dt_pbus_reg *base) +{ + return dt_pbus_translate(dev, 0, base); +} + +/* + * dt_bus_init_defaults initializes @bus with + * match <- dt_bus_match_any + * translate <- dt_pbus_translate + */ +extern void dt_bus_init_defaults(struct dt_bus *bus); + +/* + * dt_device_init initializes a dt_device with the given parameters + */ +extern void dt_device_init(struct dt_device *dev, const struct dt_bus *bus, + void *info); + +static inline void dt_device_bind_node(struct dt_device *dev, int fdtnode) +{ + dev->fdtnode = fdtnode; +} + +/* + * dt_device_find_compatible finds a @compatible node + * returns + * - node (>= 0) on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_device_find_compatible(const struct dt_device *dev, + const char *compatible); + +/* + * dt_pbus_get_base_compatible simply bundles many functions into one. + * It finds the first @compatible fdt node, then translates the 0th reg + * tuple (the base) using the processor bus translation, and finally it + * stores that result in @base. + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_pbus_get_base_compatible(const char *compatible, + struct dt_pbus_reg *base); + +/********************************************************************** + * Low-level accessors for required node types and properties + **********************************************************************/ + +/* + * dt_get_nr_cells sets @nr_address_cells and @nr_size_cells to the + * #address-cells and #size-cells properties of @fdtnode + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_get_nr_cells(int fdtnode, u32 *nr_address_cells, + u32 *nr_size_cells); + +/* dt_reg is a structure for "raw" reg tuples */ +#define MAX_ADDRESS_CELLS 4 +#define MAX_SIZE_CELLS 4 +struct dt_reg { + u32 nr_address_cells, nr_size_cells; + u32 address_cells[MAX_ADDRESS_CELLS]; + u32 size_cells[MAX_SIZE_CELLS]; +}; + +/* + * dt_reg_init initialize a dt_reg struct to zero and sets + * nr_address_cells and nr_size_cells to @nr_address_cells and + * @nr_size_cells respectively. + */ +extern void dt_reg_init(struct dt_reg *reg, u32 nr_address_cells, + u32 nr_size_cells); + +/* + * dt_get_reg gets the @regidx'th reg tuple of @fdtnode's reg property + * and stores it in @reg. @reg must be initialized. + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_get_reg(int fdtnode, int regidx, struct dt_reg *reg); + +/********************************************************************** + * High-level accessors for required node types and properties + **********************************************************************/ + +/* + * dt_get_bootargs gets the string pointer from /chosen/bootargs + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure, and @bootargs + * will be set to NULL + */ +extern int dt_get_bootargs(const char **bootargs); + +/* + * dt_get_default_console_node gets the node of the path stored in + * /chosen/stdout-path (or the deprecated /chosen/linux,stdout-path) + * returns + * - the node (>= 0) on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_get_default_console_node(void); + +/* + * dt_get_initrd gets the physical address of the initrd and its + * size from /chosen + * returns + * - zero on success + * - a negative FDT_ERR_* value on failure, and @initrd will be + * set to NULL and @size set to zero + */ +extern int dt_get_initrd(const char **initrd, u32 *size); + +/* + * dt_get_memory_params gets the memory parameters from the /memory node(s) + * storing each memory region ("address size" tuple) in consecutive entries + * of @regs, up to @nr_regs + * returns + * - number of memory regions found on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_get_memory_params(struct dt_pbus_reg *regs, int nr_regs); + +/* + * dt_for_each_cpu_node runs @func on each cpu node in the /cpus node + * passing it its fdt node, its reg property value, and @info + * - zero on success + * - a negative FDT_ERR_* value on failure + */ +extern int dt_for_each_cpu_node(void (*func)(int fdtnode, u64 regval, + void *info), void *info); + +#endif /* _DEVICETREE_H_ */ diff --git a/tests/kvm-unit-tests/lib/errata.h b/tests/kvm-unit-tests/lib/errata.h new file mode 100644 index 00000000..5e63f73b --- /dev/null +++ b/tests/kvm-unit-tests/lib/errata.h @@ -0,0 +1,24 @@ +#ifndef _ERRATA_H_ +#define _ERRATA_H_ + +#define _ERRATA(erratum) errata("ERRATA_" # erratum) +#define ERRATA(erratum) _ERRATA(erratum) + +#define _ERRATA_RELAXED(erratum) errata_relaxed("ERRATA_" # erratum) +#define ERRATA_RELAXED(erratum) _ERRATA_RELAXED(erratum) + +static inline bool errata(const char *erratum) +{ + char *s = getenv(erratum); + + return s && (*s == '1' || *s == 'y' || *s == 'Y'); +} + +static inline bool errata_relaxed(const char *erratum) +{ + char *s = getenv(erratum); + + return !(s && (*s == '0' || *s == 'n' || *s == 'N')); +} + +#endif diff --git a/tests/kvm-unit-tests/lib/kbuild.h b/tests/kvm-unit-tests/lib/kbuild.h new file mode 100644 index 00000000..ab99db67 --- /dev/null +++ b/tests/kvm-unit-tests/lib/kbuild.h @@ -0,0 +1,8 @@ +#ifndef _KBUILD_H_ +#define _KBUILD_H_ +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) +#define OFFSET(sym, str, mem) DEFINE(sym, offsetof(struct str, mem)) +#define COMMENT(x) asm volatile("\n->#" x) +#define BLANK() asm volatile("\n->" : : ) +#endif diff --git a/tests/kvm-unit-tests/lib/libcflat.h b/tests/kvm-unit-tests/lib/libcflat.h new file mode 100644 index 00000000..96a37926 --- /dev/null +++ b/tests/kvm-unit-tests/lib/libcflat.h @@ -0,0 +1,132 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard + */ + +#ifndef __LIBCFLAT_H +#define __LIBCFLAT_H + +#include +#include +#include +#include + +#define __unused __attribute__((__unused__)) + +#define xstr(s...) xxstr(s) +#define xxstr(s...) #s + +#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#define __ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a) - 1) +#define ALIGN(x, a) __ALIGN((x), (a)) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#define SZ_4K (1 << 12) +#define SZ_64K (1 << 16) +#define SZ_2M (1 << 21) +#define SZ_1G (1 << 30) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +typedef uint8_t u8; +typedef int8_t s8; +typedef uint16_t u16; +typedef int16_t s16; +typedef uint32_t u32; +typedef int32_t s32; +typedef uint64_t u64; +typedef int64_t s64; +typedef unsigned long ulong; + +typedef _Bool bool; +#define false 0 +#define true 1 + +#if __SIZEOF_LONG__ == 8 +# define __PRI32_PREFIX +# define __PRI64_PREFIX "l" +# define __PRIPTR_PREFIX "l" +#else +#if defined(__U32_LONG_FMT__) +# define __PRI32_PREFIX "l" +#else +# define __PRI32_PREFIX +#endif +# define __PRI64_PREFIX "ll" +# define __PRIPTR_PREFIX +#endif +#define PRId32 __PRI32_PREFIX "d" +#define PRIu32 __PRI32_PREFIX "u" +#define PRIx32 __PRI32_PREFIX "x" +#define PRId64 __PRI64_PREFIX "d" +#define PRIu64 __PRI64_PREFIX "u" +#define PRIx64 __PRI64_PREFIX "x" +#define PRIxPTR __PRIPTR_PREFIX "x" + +typedef u64 phys_addr_t; +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) + +extern void puts(const char *s); +extern void exit(int code); +extern void abort(void); +extern long atol(const char *ptr); +extern char *getenv(const char *name); + +extern int printf(const char *fmt, ...) + __attribute__((format(printf, 1, 2))); +extern int snprintf(char *buf, int size, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); +extern int vsnprintf(char *buf, int size, const char *fmt, va_list va) + __attribute__((format(printf, 3, 0))); +extern int vprintf(const char *fmt, va_list va) + __attribute__((format(printf, 1, 0))); + +extern void report_prefix_push(const char *prefix); +extern void report_prefix_pop(void); +extern void report(const char *msg_fmt, bool pass, ...); +extern void report_xfail(const char *msg_fmt, bool xfail, bool pass, ...); +extern void report_abort(const char *msg_fmt, ...); +extern void report_skip(const char *msg_fmt, ...); +extern void report_info(const char *msg_fmt, ...); +extern int report_summary(void); + +extern void dump_stack(void); +extern void dump_frame_stack(const void *instruction, const void *frame); + +#define ARRAY_SIZE(_a) (sizeof(_a)/sizeof((_a)[0])) + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#define assert(cond) \ +do { \ + if (!(cond)) { \ + printf("%s:%d: assert failed: %s\n", \ + __FILE__, __LINE__, #cond); \ + dump_stack(); \ + abort(); \ + } \ +} while (0) + +static inline bool is_power_of_2(unsigned long n) +{ + return n && !(n & (n - 1)); +} + +#endif diff --git a/tests/kvm-unit-tests/lib/libfdt/Makefile.libfdt b/tests/kvm-unit-tests/lib/libfdt/Makefile.libfdt new file mode 100644 index 00000000..91126c00 --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/Makefile.libfdt @@ -0,0 +1,10 @@ +# Makefile.libfdt +# +# This is not a complete Makefile of itself. Instead, it is designed to +# be easily embeddable into other systems of Makefiles. +# +LIBFDT_soname = libfdt.$(SHAREDLIB_EXT).1 +LIBFDT_INCLUDES = fdt.h libfdt.h libfdt_env.h +LIBFDT_VERSION = version.lds +LIBFDT_SRCS = fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c fdt_empty_tree.c +LIBFDT_OBJS = $(LIBFDT_SRCS:%.c=%.o) diff --git a/tests/kvm-unit-tests/lib/libfdt/README b/tests/kvm-unit-tests/lib/libfdt/README new file mode 100644 index 00000000..24ad4fec --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/README @@ -0,0 +1,4 @@ + +The code in this directory is originally imported from the libfdt +directory of git://git.jdl.com/software/dtc.git - version 1.4.0. + diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt.c b/tests/kvm-unit-tests/lib/libfdt/fdt.c new file mode 100644 index 00000000..2ce6a441 --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt.c @@ -0,0 +1,250 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +int fdt_check_header(const void *fdt) +{ + if (fdt_magic(fdt) == FDT_MAGIC) { + /* Complete tree */ + if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + if (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + /* Unfinished sequential-write blob */ + if (fdt_size_dt_struct(fdt) == 0) + return -FDT_ERR_BADSTATE; + } else { + return -FDT_ERR_BADMAGIC; + } + + return 0; +} + +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) +{ + const char *p; + + if (fdt_version(fdt) >= 0x11) + if (((offset + len) < offset) + || ((offset + len) > fdt_size_dt_struct(fdt))) + return NULL; + + p = _fdt_offset_ptr(fdt, offset); + + if (p + len < p) + return NULL; + return p; +} + +uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) +{ + const fdt32_t *tagp, *lenp; + uint32_t tag; + int offset = startoffset; + const char *p; + + *nextoffset = -FDT_ERR_TRUNCATED; + tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE); + if (!tagp) + return FDT_END; /* premature end */ + tag = fdt32_to_cpu(*tagp); + offset += FDT_TAGSIZE; + + *nextoffset = -FDT_ERR_BADSTRUCTURE; + switch (tag) { + case FDT_BEGIN_NODE: + /* skip name */ + do { + p = fdt_offset_ptr(fdt, offset++, 1); + } while (p && (*p != '\0')); + if (!p) + return FDT_END; /* premature end */ + break; + + case FDT_PROP: + lenp = fdt_offset_ptr(fdt, offset, sizeof(*lenp)); + if (!lenp) + return FDT_END; /* premature end */ + /* skip-name offset, length and value */ + offset += sizeof(struct fdt_property) - FDT_TAGSIZE + + fdt32_to_cpu(*lenp); + break; + + case FDT_END: + case FDT_END_NODE: + case FDT_NOP: + break; + + default: + return FDT_END; + } + + if (!fdt_offset_ptr(fdt, startoffset, offset - startoffset)) + return FDT_END; /* premature end */ + + *nextoffset = FDT_TAGALIGN(offset); + return tag; +} + +int _fdt_check_node_offset(const void *fdt, int offset) +{ + if ((offset < 0) || (offset % FDT_TAGSIZE) + || (fdt_next_tag(fdt, offset, &offset) != FDT_BEGIN_NODE)) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int _fdt_check_prop_offset(const void *fdt, int offset) +{ + if ((offset < 0) || (offset % FDT_TAGSIZE) + || (fdt_next_tag(fdt, offset, &offset) != FDT_PROP)) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_next_node(const void *fdt, int offset, int *depth) +{ + int nextoffset = 0; + uint32_t tag; + + if (offset >= 0) + if ((nextoffset = _fdt_check_node_offset(fdt, offset)) < 0) + return nextoffset; + + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_PROP: + case FDT_NOP: + break; + + case FDT_BEGIN_NODE: + if (depth) + (*depth)++; + break; + + case FDT_END_NODE: + if (depth && ((--(*depth)) < 0)) + return nextoffset; + break; + + case FDT_END: + if ((nextoffset >= 0) + || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth)) + return -FDT_ERR_NOTFOUND; + else + return nextoffset; + } + } while (tag != FDT_BEGIN_NODE); + + return offset; +} + +int fdt_first_subnode(const void *fdt, int offset) +{ + int depth = 0; + + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth != 1) + return -FDT_ERR_NOTFOUND; + + return offset; +} + +int fdt_next_subnode(const void *fdt, int offset) +{ + int depth = 1; + + /* + * With respect to the parent, the depth of the next subnode will be + * the same as the last. + */ + do { + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth < 1) + return -FDT_ERR_NOTFOUND; + } while (depth > 1); + + return offset; +} + +const char *_fdt_find_string(const char *strtab, int tabsize, const char *s) +{ + int len = strlen(s) + 1; + const char *last = strtab + tabsize - len; + const char *p; + + for (p = strtab; p <= last; p++) + if (memcmp(p, s, len) == 0) + return p; + return NULL; +} + +int fdt_move(const void *fdt, void *buf, int bufsize) +{ + FDT_CHECK_HEADER(fdt); + + if (fdt_totalsize(fdt) > bufsize) + return -FDT_ERR_NOSPACE; + + memmove(buf, fdt, fdt_totalsize(fdt)); + return 0; +} diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt.h b/tests/kvm-unit-tests/lib/libfdt/fdt.h new file mode 100644 index 00000000..526aedb5 --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt.h @@ -0,0 +1,111 @@ +#ifndef _FDT_H +#define _FDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ASSEMBLY__ + +struct fdt_header { + fdt32_t magic; /* magic word FDT_MAGIC */ + fdt32_t totalsize; /* total size of DT block */ + fdt32_t off_dt_struct; /* offset to structure */ + fdt32_t off_dt_strings; /* offset to strings */ + fdt32_t off_mem_rsvmap; /* offset to memory reserve map */ + fdt32_t version; /* format version */ + fdt32_t last_comp_version; /* last compatible version */ + + /* version 2 fields below */ + fdt32_t boot_cpuid_phys; /* Which physical CPU id we're + booting on */ + /* version 3 fields below */ + fdt32_t size_dt_strings; /* size of the strings block */ + + /* version 17 fields below */ + fdt32_t size_dt_struct; /* size of the structure block */ +}; + +struct fdt_reserve_entry { + fdt64_t address; + fdt64_t size; +}; + +struct fdt_node_header { + fdt32_t tag; + char name[0]; +}; + +struct fdt_property { + fdt32_t tag; + fdt32_t len; + fdt32_t nameoff; + char data[0]; +}; + +#endif /* !__ASSEMBLY */ + +#define FDT_MAGIC 0xd00dfeed /* 4: version, 4: total size */ +#define FDT_TAGSIZE sizeof(fdt32_t) + +#define FDT_BEGIN_NODE 0x1 /* Start node: full name */ +#define FDT_END_NODE 0x2 /* End node */ +#define FDT_PROP 0x3 /* Property: name off, + size, content */ +#define FDT_NOP 0x4 /* nop */ +#define FDT_END 0x9 + +#define FDT_V1_SIZE (7*sizeof(fdt32_t)) +#define FDT_V2_SIZE (FDT_V1_SIZE + sizeof(fdt32_t)) +#define FDT_V3_SIZE (FDT_V2_SIZE + sizeof(fdt32_t)) +#define FDT_V16_SIZE FDT_V3_SIZE +#define FDT_V17_SIZE (FDT_V16_SIZE + sizeof(fdt32_t)) + +#endif /* _FDT_H */ diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt_empty_tree.c b/tests/kvm-unit-tests/lib/libfdt/fdt_empty_tree.c new file mode 100644 index 00000000..f72d13b1 --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt_empty_tree.c @@ -0,0 +1,84 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2012 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +int fdt_create_empty_tree(void *buf, int bufsize) +{ + int err; + + err = fdt_create(buf, bufsize); + if (err) + return err; + + err = fdt_finish_reservemap(buf); + if (err) + return err; + + err = fdt_begin_node(buf, ""); + if (err) + return err; + + err = fdt_end_node(buf); + if (err) + return err; + + err = fdt_finish(buf); + if (err) + return err; + + return fdt_open_into(buf, buf, bufsize); +} + diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt_ro.c b/tests/kvm-unit-tests/lib/libfdt/fdt_ro.c new file mode 100644 index 00000000..50007f61 --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt_ro.c @@ -0,0 +1,573 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int _fdt_nodename_eq(const void *fdt, int offset, + const char *s, int len) +{ + const char *p = fdt_offset_ptr(fdt, offset + FDT_TAGSIZE, len+1); + + if (! p) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +const char *fdt_string(const void *fdt, int stroffset) +{ + return (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset; +} + +static int _fdt_string_eq(const void *fdt, int stroffset, + const char *s, int len) +{ + const char *p = fdt_string(fdt, stroffset); + + return (strlen(p) == len) && (memcmp(p, s, len) == 0); +} + +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size) +{ + FDT_CHECK_HEADER(fdt); + *address = fdt64_to_cpu(_fdt_mem_rsv(fdt, n)->address); + *size = fdt64_to_cpu(_fdt_mem_rsv(fdt, n)->size); + return 0; +} + +int fdt_num_mem_rsv(const void *fdt) +{ + int i = 0; + + while (fdt64_to_cpu(_fdt_mem_rsv(fdt, i)->size) != 0) + i++; + return i; +} + +static int _nextprop(const void *fdt, int offset) +{ + uint32_t tag; + int nextoffset; + + do { + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_END: + if (nextoffset >= 0) + return -FDT_ERR_BADSTRUCTURE; + else + return nextoffset; + + case FDT_PROP: + return offset; + } + offset = nextoffset; + } while (tag == FDT_NOP); + + return -FDT_ERR_NOTFOUND; +} + +int fdt_subnode_offset_namelen(const void *fdt, int offset, + const char *name, int namelen) +{ + int depth; + + FDT_CHECK_HEADER(fdt); + + for (depth = 0; + (offset >= 0) && (depth >= 0); + offset = fdt_next_node(fdt, offset, &depth)) + if ((depth == 1) + && _fdt_nodename_eq(fdt, offset, name, namelen)) + return offset; + + if (depth < 0) + return -FDT_ERR_NOTFOUND; + return offset; /* error */ +} + +int fdt_subnode_offset(const void *fdt, int parentoffset, + const char *name) +{ + return fdt_subnode_offset_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_path_offset(const void *fdt, const char *path) +{ + const char *end = path + strlen(path); + const char *p = path; + int offset = 0; + + FDT_CHECK_HEADER(fdt); + + /* see if we have an alias */ + if (*path != '/') { + const char *q = strchr(path, '/'); + + if (!q) + q = end; + + p = fdt_get_alias_namelen(fdt, p, q - p); + if (!p) + return -FDT_ERR_BADPATH; + offset = fdt_path_offset(fdt, p); + + p = q; + } + + while (*p) { + const char *q; + + while (*p == '/') + p++; + if (! *p) + return offset; + q = strchr(p, '/'); + if (! q) + q = end; + + offset = fdt_subnode_offset_namelen(fdt, offset, p, q-p); + if (offset < 0) + return offset; + + p = q; + } + + return offset; +} + +const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) +{ + const struct fdt_node_header *nh = _fdt_offset_ptr(fdt, nodeoffset); + int err; + + if (((err = fdt_check_header(fdt)) != 0) + || ((err = _fdt_check_node_offset(fdt, nodeoffset)) < 0)) + goto fail; + + if (len) + *len = strlen(nh->name); + + return nh->name; + + fail: + if (len) + *len = err; + return NULL; +} + +int fdt_first_property_offset(const void *fdt, int nodeoffset) +{ + int offset; + + if ((offset = _fdt_check_node_offset(fdt, nodeoffset)) < 0) + return offset; + + return _nextprop(fdt, offset); +} + +int fdt_next_property_offset(const void *fdt, int offset) +{ + if ((offset = _fdt_check_prop_offset(fdt, offset)) < 0) + return offset; + + return _nextprop(fdt, offset); +} + +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp) +{ + int err; + const struct fdt_property *prop; + + if ((err = _fdt_check_prop_offset(fdt, offset)) < 0) { + if (lenp) + *lenp = err; + return NULL; + } + + prop = _fdt_offset_ptr(fdt, offset); + + if (lenp) + *lenp = fdt32_to_cpu(prop->len); + + return prop; +} + +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int offset, + const char *name, + int namelen, int *lenp) +{ + for (offset = fdt_first_property_offset(fdt, offset); + (offset >= 0); + (offset = fdt_next_property_offset(fdt, offset))) { + const struct fdt_property *prop; + + if (!(prop = fdt_get_property_by_offset(fdt, offset, lenp))) { + offset = -FDT_ERR_INTERNAL; + break; + } + if (_fdt_string_eq(fdt, fdt32_to_cpu(prop->nameoff), + name, namelen)) + return prop; + } + + if (lenp) + *lenp = offset; + return NULL; +} + +const struct fdt_property *fdt_get_property(const void *fdt, + int nodeoffset, + const char *name, int *lenp) +{ + return fdt_get_property_namelen(fdt, nodeoffset, name, + strlen(name), lenp); +} + +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp) +{ + const struct fdt_property *prop; + + prop = fdt_get_property_namelen(fdt, nodeoffset, name, namelen, lenp); + if (! prop) + return NULL; + + return prop->data; +} + +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp) +{ + const struct fdt_property *prop; + + prop = fdt_get_property_by_offset(fdt, offset, lenp); + if (!prop) + return NULL; + if (namep) + *namep = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); + return prop->data; +} + +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return fdt_getprop_namelen(fdt, nodeoffset, name, strlen(name), lenp); +} + +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset) +{ + const fdt32_t *php; + int len; + + /* FIXME: This is a bit sub-optimal, since we potentially scan + * over all the properties twice. */ + php = fdt_getprop(fdt, nodeoffset, "phandle", &len); + if (!php || (len != sizeof(*php))) { + php = fdt_getprop(fdt, nodeoffset, "linux,phandle", &len); + if (!php || (len != sizeof(*php))) + return 0; + } + + return fdt32_to_cpu(*php); +} + +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen) +{ + int aliasoffset; + + aliasoffset = fdt_path_offset(fdt, "/aliases"); + if (aliasoffset < 0) + return NULL; + + return fdt_getprop_namelen(fdt, aliasoffset, name, namelen, NULL); +} + +const char *fdt_get_alias(const void *fdt, const char *name) +{ + return fdt_get_alias_namelen(fdt, name, strlen(name)); +} + +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen) +{ + int pdepth = 0, p = 0; + int offset, depth, namelen; + const char *name; + + FDT_CHECK_HEADER(fdt); + + if (buflen < 2) + return -FDT_ERR_NOSPACE; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + while (pdepth > depth) { + do { + p--; + } while (buf[p-1] != '/'); + pdepth--; + } + + if (pdepth >= depth) { + name = fdt_get_name(fdt, offset, &namelen); + if (!name) + return namelen; + if ((p + namelen + 1) <= buflen) { + memcpy(buf + p, name, namelen); + p += namelen; + buf[p++] = '/'; + pdepth++; + } + } + + if (offset == nodeoffset) { + if (pdepth < (depth + 1)) + return -FDT_ERR_NOSPACE; + + if (p > 1) /* special case so that root path is "/", not "" */ + p--; + buf[p] = '\0'; + return 0; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth) +{ + int offset, depth; + int supernodeoffset = -FDT_ERR_INTERNAL; + + FDT_CHECK_HEADER(fdt); + + if (supernodedepth < 0) + return -FDT_ERR_NOTFOUND; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + if (depth == supernodedepth) + supernodeoffset = offset; + + if (offset == nodeoffset) { + if (nodedepth) + *nodedepth = depth; + + if (supernodedepth > depth) + return -FDT_ERR_NOTFOUND; + else + return supernodeoffset; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_depth(const void *fdt, int nodeoffset) +{ + int nodedepth; + int err; + + err = fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, &nodedepth); + if (err) + return (err < 0) ? err : -FDT_ERR_INTERNAL; + return nodedepth; +} + +int fdt_parent_offset(const void *fdt, int nodeoffset) +{ + int nodedepth = fdt_node_depth(fdt, nodeoffset); + + if (nodedepth < 0) + return nodedepth; + return fdt_supernode_atdepth_offset(fdt, nodeoffset, + nodedepth - 1, NULL); +} + +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen) +{ + int offset; + const void *val; + int len; + + FDT_CHECK_HEADER(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_getprop(), then if that didn't + * find what we want, we scan over them again making our way + * to the next node. Still it's the easiest to implement + * approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + val = fdt_getprop(fdt, offset, propname, &len); + if (val && (len == proplen) + && (memcmp(val, propval, len) == 0)) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle) +{ + int offset; + + if ((phandle == 0) || (phandle == -1)) + return -FDT_ERR_BADPHANDLE; + + FDT_CHECK_HEADER(fdt); + + /* FIXME: The algorithm here is pretty horrible: we + * potentially scan each property of a node in + * fdt_get_phandle(), then if that didn't find what + * we want, we scan over them again making our way to the next + * node. Still it's the easiest to implement approach; + * performance can come later. */ + for (offset = fdt_next_node(fdt, -1, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + if (fdt_get_phandle(fdt, offset) == phandle) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str) +{ + int len = strlen(str); + const char *p; + + while (listlen >= len) { + if (memcmp(str, strlist, len+1) == 0) + return 1; + p = memchr(strlist, '\0', listlen); + if (!p) + return 0; /* malformed strlist.. */ + listlen -= (p-strlist) + 1; + strlist = p + 1; + } + return 0; +} + +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, nodeoffset, "compatible", &len); + if (!prop) + return len; + if (fdt_stringlist_contains(prop, len, compatible)) + return 0; + else + return 1; +} + +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible) +{ + int offset, err; + + FDT_CHECK_HEADER(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_node_check_compatible(), then if + * that didn't find what we want, we scan over them again + * making our way to the next node. Still it's the easiest to + * implement approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + err = fdt_node_check_compatible(fdt, offset, compatible); + if ((err < 0) && (err != -FDT_ERR_NOTFOUND)) + return err; + else if (err == 0) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt_rw.c b/tests/kvm-unit-tests/lib/libfdt/fdt_rw.c new file mode 100644 index 00000000..fdba618f --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt_rw.c @@ -0,0 +1,492 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int _fdt_blocks_misordered(const void *fdt, + int mem_rsv_size, int struct_size) +{ + return (fdt_off_mem_rsvmap(fdt) < FDT_ALIGN(sizeof(struct fdt_header), 8)) + || (fdt_off_dt_struct(fdt) < + (fdt_off_mem_rsvmap(fdt) + mem_rsv_size)) + || (fdt_off_dt_strings(fdt) < + (fdt_off_dt_struct(fdt) + struct_size)) + || (fdt_totalsize(fdt) < + (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt))); +} + +static int _fdt_rw_check_header(void *fdt) +{ + FDT_CHECK_HEADER(fdt); + + if (fdt_version(fdt) < 17) + return -FDT_ERR_BADVERSION; + if (_fdt_blocks_misordered(fdt, sizeof(struct fdt_reserve_entry), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_BADLAYOUT; + if (fdt_version(fdt) > 17) + fdt_set_version(fdt, 17); + + return 0; +} + +#define FDT_RW_CHECK_HEADER(fdt) \ + { \ + int err; \ + if ((err = _fdt_rw_check_header(fdt)) != 0) \ + return err; \ + } + +static inline int _fdt_data_size(void *fdt) +{ + return fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); +} + +static int _fdt_splice(void *fdt, void *splicepoint, int oldlen, int newlen) +{ + char *p = splicepoint; + char *end = (char *)fdt + _fdt_data_size(fdt); + + if (((p + oldlen) < p) || ((p + oldlen) > end)) + return -FDT_ERR_BADOFFSET; + if ((end - oldlen + newlen) > ((char *)fdt + fdt_totalsize(fdt))) + return -FDT_ERR_NOSPACE; + memmove(p + newlen, p + oldlen, end - p - oldlen); + return 0; +} + +static int _fdt_splice_mem_rsv(void *fdt, struct fdt_reserve_entry *p, + int oldn, int newn) +{ + int delta = (newn - oldn) * sizeof(*p); + int err; + err = _fdt_splice(fdt, p, oldn * sizeof(*p), newn * sizeof(*p)); + if (err) + return err; + fdt_set_off_dt_struct(fdt, fdt_off_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +static int _fdt_splice_struct(void *fdt, void *p, + int oldlen, int newlen) +{ + int delta = newlen - oldlen; + int err; + + if ((err = _fdt_splice(fdt, p, oldlen, newlen))) + return err; + + fdt_set_size_dt_struct(fdt, fdt_size_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +static int _fdt_splice_string(void *fdt, int newlen) +{ + void *p = (char *)fdt + + fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); + int err; + + if ((err = _fdt_splice(fdt, p, 0, newlen))) + return err; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) + newlen); + return 0; +} + +static int _fdt_find_add_string(void *fdt, const char *s) +{ + char *strtab = (char *)fdt + fdt_off_dt_strings(fdt); + const char *p; + char *new; + int len = strlen(s) + 1; + int err; + + p = _fdt_find_string(strtab, fdt_size_dt_strings(fdt), s); + if (p) + /* found it */ + return (p - strtab); + + new = strtab + fdt_size_dt_strings(fdt); + err = _fdt_splice_string(fdt, len); + if (err) + return err; + + memcpy(new, s, len); + return (new - strtab); +} + +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size) +{ + struct fdt_reserve_entry *re; + int err; + + FDT_RW_CHECK_HEADER(fdt); + + re = _fdt_mem_rsv_w(fdt, fdt_num_mem_rsv(fdt)); + err = _fdt_splice_mem_rsv(fdt, re, 0, 1); + if (err) + return err; + + re->address = cpu_to_fdt64(address); + re->size = cpu_to_fdt64(size); + return 0; +} + +int fdt_del_mem_rsv(void *fdt, int n) +{ + struct fdt_reserve_entry *re = _fdt_mem_rsv_w(fdt, n); + int err; + + FDT_RW_CHECK_HEADER(fdt); + + if (n >= fdt_num_mem_rsv(fdt)) + return -FDT_ERR_NOTFOUND; + + err = _fdt_splice_mem_rsv(fdt, re, 1, 0); + if (err) + return err; + return 0; +} + +static int _fdt_resize_property(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int oldlen; + int err; + + *prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (! (*prop)) + return oldlen; + + if ((err = _fdt_splice_struct(fdt, (*prop)->data, FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(len)))) + return err; + + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +static int _fdt_add_property(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int proplen; + int nextoffset; + int namestroff; + int err; + + if ((nextoffset = _fdt_check_node_offset(fdt, nodeoffset)) < 0) + return nextoffset; + + namestroff = _fdt_find_add_string(fdt, name); + if (namestroff < 0) + return namestroff; + + *prop = _fdt_offset_ptr_w(fdt, nextoffset); + proplen = sizeof(**prop) + FDT_TAGALIGN(len); + + err = _fdt_splice_struct(fdt, *prop, 0, proplen); + if (err) + return err; + + (*prop)->tag = cpu_to_fdt32(FDT_PROP); + (*prop)->nameoff = cpu_to_fdt32(namestroff); + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +int fdt_set_name(void *fdt, int nodeoffset, const char *name) +{ + char *namep; + int oldlen, newlen; + int err; + + FDT_RW_CHECK_HEADER(fdt); + + namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen); + if (!namep) + return oldlen; + + newlen = strlen(name); + + err = _fdt_splice_struct(fdt, namep, FDT_TAGALIGN(oldlen+1), + FDT_TAGALIGN(newlen+1)); + if (err) + return err; + + memcpy(namep, name, newlen+1); + return 0; +} + +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + struct fdt_property *prop; + int err; + + FDT_RW_CHECK_HEADER(fdt); + + err = _fdt_resize_property(fdt, nodeoffset, name, len, &prop); + if (err == -FDT_ERR_NOTFOUND) + err = _fdt_add_property(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + + memcpy(prop->data, val, len); + return 0; +} + +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + struct fdt_property *prop; + int err, oldlen, newlen; + + FDT_RW_CHECK_HEADER(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (prop) { + newlen = len + oldlen; + err = _fdt_splice_struct(fdt, prop->data, + FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(newlen)); + if (err) + return err; + prop->len = cpu_to_fdt32(newlen); + memcpy(prop->data + oldlen, val, len); + } else { + err = _fdt_add_property(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + memcpy(prop->data, val, len); + } + return 0; +} + +int fdt_delprop(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len, proplen; + + FDT_RW_CHECK_HEADER(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (! prop) + return len; + + proplen = sizeof(*prop) + FDT_TAGALIGN(len); + return _fdt_splice_struct(fdt, prop, proplen, 0); +} + +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen) +{ + struct fdt_node_header *nh; + int offset, nextoffset; + int nodelen; + int err; + uint32_t tag; + fdt32_t *endtag; + + FDT_RW_CHECK_HEADER(fdt); + + offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen); + if (offset >= 0) + return -FDT_ERR_EXISTS; + else if (offset != -FDT_ERR_NOTFOUND) + return offset; + + /* Try to place the new node after the parent's properties */ + fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */ + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + } while ((tag == FDT_PROP) || (tag == FDT_NOP)); + + nh = _fdt_offset_ptr_w(fdt, offset); + nodelen = sizeof(*nh) + FDT_TAGALIGN(namelen+1) + FDT_TAGSIZE; + + err = _fdt_splice_struct(fdt, nh, 0, nodelen); + if (err) + return err; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memset(nh->name, 0, FDT_TAGALIGN(namelen+1)); + memcpy(nh->name, name, namelen); + endtag = (fdt32_t *)((char *)nh + nodelen - FDT_TAGSIZE); + *endtag = cpu_to_fdt32(FDT_END_NODE); + + return offset; +} + +int fdt_add_subnode(void *fdt, int parentoffset, const char *name) +{ + return fdt_add_subnode_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_del_node(void *fdt, int nodeoffset) +{ + int endoffset; + + FDT_RW_CHECK_HEADER(fdt); + + endoffset = _fdt_node_end_offset(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + return _fdt_splice_struct(fdt, _fdt_offset_ptr_w(fdt, nodeoffset), + endoffset - nodeoffset, 0); +} + +static void _fdt_packblocks(const char *old, char *new, + int mem_rsv_size, int struct_size) +{ + int mem_rsv_off, struct_off, strings_off; + + mem_rsv_off = FDT_ALIGN(sizeof(struct fdt_header), 8); + struct_off = mem_rsv_off + mem_rsv_size; + strings_off = struct_off + struct_size; + + memmove(new + mem_rsv_off, old + fdt_off_mem_rsvmap(old), mem_rsv_size); + fdt_set_off_mem_rsvmap(new, mem_rsv_off); + + memmove(new + struct_off, old + fdt_off_dt_struct(old), struct_size); + fdt_set_off_dt_struct(new, struct_off); + fdt_set_size_dt_struct(new, struct_size); + + memmove(new + strings_off, old + fdt_off_dt_strings(old), + fdt_size_dt_strings(old)); + fdt_set_off_dt_strings(new, strings_off); + fdt_set_size_dt_strings(new, fdt_size_dt_strings(old)); +} + +int fdt_open_into(const void *fdt, void *buf, int bufsize) +{ + int err; + int mem_rsv_size, struct_size; + int newsize; + const char *fdtstart = fdt; + const char *fdtend = fdtstart + fdt_totalsize(fdt); + char *tmp; + + FDT_CHECK_HEADER(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + + if (fdt_version(fdt) >= 17) { + struct_size = fdt_size_dt_struct(fdt); + } else { + struct_size = 0; + while (fdt_next_tag(fdt, struct_size, &struct_size) != FDT_END) + ; + if (struct_size < 0) + return struct_size; + } + + if (!_fdt_blocks_misordered(fdt, mem_rsv_size, struct_size)) { + /* no further work necessary */ + err = fdt_move(fdt, buf, bufsize); + if (err) + return err; + fdt_set_version(buf, 17); + fdt_set_size_dt_struct(buf, struct_size); + fdt_set_totalsize(buf, bufsize); + return 0; + } + + /* Need to reorder */ + newsize = FDT_ALIGN(sizeof(struct fdt_header), 8) + mem_rsv_size + + struct_size + fdt_size_dt_strings(fdt); + + if (bufsize < newsize) + return -FDT_ERR_NOSPACE; + + /* First attempt to build converted tree at beginning of buffer */ + tmp = buf; + /* But if that overlaps with the old tree... */ + if (((tmp + newsize) > fdtstart) && (tmp < fdtend)) { + /* Try right after the old tree instead */ + tmp = (char *)(uintptr_t)fdtend; + if ((tmp + newsize) > ((char *)buf + bufsize)) + return -FDT_ERR_NOSPACE; + } + + _fdt_packblocks(fdt, tmp, mem_rsv_size, struct_size); + memmove(buf, tmp, newsize); + + fdt_set_magic(buf, FDT_MAGIC); + fdt_set_totalsize(buf, bufsize); + fdt_set_version(buf, 17); + fdt_set_last_comp_version(buf, 16); + fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt)); + + return 0; +} + +int fdt_pack(void *fdt) +{ + int mem_rsv_size; + + FDT_RW_CHECK_HEADER(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + _fdt_packblocks(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt)); + fdt_set_totalsize(fdt, _fdt_data_size(fdt)); + + return 0; +} diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt_strerror.c b/tests/kvm-unit-tests/lib/libfdt/fdt_strerror.c new file mode 100644 index 00000000..e6c3ceee --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt_strerror.c @@ -0,0 +1,96 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +struct fdt_errtabent { + const char *str; +}; + +#define FDT_ERRTABENT(val) \ + [(val)] = { .str = #val, } + +static struct fdt_errtabent fdt_errtable[] = { + FDT_ERRTABENT(FDT_ERR_NOTFOUND), + FDT_ERRTABENT(FDT_ERR_EXISTS), + FDT_ERRTABENT(FDT_ERR_NOSPACE), + + FDT_ERRTABENT(FDT_ERR_BADOFFSET), + FDT_ERRTABENT(FDT_ERR_BADPATH), + FDT_ERRTABENT(FDT_ERR_BADSTATE), + + FDT_ERRTABENT(FDT_ERR_TRUNCATED), + FDT_ERRTABENT(FDT_ERR_BADMAGIC), + FDT_ERRTABENT(FDT_ERR_BADVERSION), + FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE), + FDT_ERRTABENT(FDT_ERR_BADLAYOUT), +}; +#define FDT_ERRTABSIZE (sizeof(fdt_errtable) / sizeof(fdt_errtable[0])) + +const char *fdt_strerror(int errval) +{ + if (errval > 0) + return ""; + else if (errval == 0) + return ""; + else if (errval > -FDT_ERRTABSIZE) { + const char *s = fdt_errtable[-errval].str; + + if (s) + return s; + } + + return ""; +} diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt_sw.c b/tests/kvm-unit-tests/lib/libfdt/fdt_sw.c new file mode 100644 index 00000000..f422754d --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt_sw.c @@ -0,0 +1,256 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int _fdt_sw_check_header(void *fdt) +{ + if (fdt_magic(fdt) != FDT_SW_MAGIC) + return -FDT_ERR_BADMAGIC; + /* FIXME: should check more details about the header state */ + return 0; +} + +#define FDT_SW_CHECK_HEADER(fdt) \ + { \ + int err; \ + if ((err = _fdt_sw_check_header(fdt)) != 0) \ + return err; \ + } + +static void *_fdt_grab_space(void *fdt, size_t len) +{ + int offset = fdt_size_dt_struct(fdt); + int spaceleft; + + spaceleft = fdt_totalsize(fdt) - fdt_off_dt_struct(fdt) + - fdt_size_dt_strings(fdt); + + if ((offset + len < offset) || (offset + len > spaceleft)) + return NULL; + + fdt_set_size_dt_struct(fdt, offset + len); + return _fdt_offset_ptr_w(fdt, offset); +} + +int fdt_create(void *buf, int bufsize) +{ + void *fdt = buf; + + if (bufsize < sizeof(struct fdt_header)) + return -FDT_ERR_NOSPACE; + + memset(buf, 0, bufsize); + + fdt_set_magic(fdt, FDT_SW_MAGIC); + fdt_set_version(fdt, FDT_LAST_SUPPORTED_VERSION); + fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION); + fdt_set_totalsize(fdt, bufsize); + + fdt_set_off_mem_rsvmap(fdt, FDT_ALIGN(sizeof(struct fdt_header), + sizeof(struct fdt_reserve_entry))); + fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt)); + fdt_set_off_dt_strings(fdt, bufsize); + + return 0; +} + +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size) +{ + struct fdt_reserve_entry *re; + int offset; + + FDT_SW_CHECK_HEADER(fdt); + + if (fdt_size_dt_struct(fdt)) + return -FDT_ERR_BADSTATE; + + offset = fdt_off_dt_struct(fdt); + if ((offset + sizeof(*re)) > fdt_totalsize(fdt)) + return -FDT_ERR_NOSPACE; + + re = (struct fdt_reserve_entry *)((char *)fdt + offset); + re->address = cpu_to_fdt64(addr); + re->size = cpu_to_fdt64(size); + + fdt_set_off_dt_struct(fdt, offset + sizeof(*re)); + + return 0; +} + +int fdt_finish_reservemap(void *fdt) +{ + return fdt_add_reservemap_entry(fdt, 0, 0); +} + +int fdt_begin_node(void *fdt, const char *name) +{ + struct fdt_node_header *nh; + int namelen = strlen(name) + 1; + + FDT_SW_CHECK_HEADER(fdt); + + nh = _fdt_grab_space(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen)); + if (! nh) + return -FDT_ERR_NOSPACE; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memcpy(nh->name, name, namelen); + return 0; +} + +int fdt_end_node(void *fdt) +{ + fdt32_t *en; + + FDT_SW_CHECK_HEADER(fdt); + + en = _fdt_grab_space(fdt, FDT_TAGSIZE); + if (! en) + return -FDT_ERR_NOSPACE; + + *en = cpu_to_fdt32(FDT_END_NODE); + return 0; +} + +static int _fdt_find_add_string(void *fdt, const char *s) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + const char *p; + int strtabsize = fdt_size_dt_strings(fdt); + int len = strlen(s) + 1; + int struct_top, offset; + + p = _fdt_find_string(strtab - strtabsize, strtabsize, s); + if (p) + return p - strtab; + + /* Add it */ + offset = -strtabsize - len; + struct_top = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + if (fdt_totalsize(fdt) + offset < struct_top) + return 0; /* no more room :( */ + + memcpy(strtab + offset, s, len); + fdt_set_size_dt_strings(fdt, strtabsize + len); + return offset; +} + +int fdt_property(void *fdt, const char *name, const void *val, int len) +{ + struct fdt_property *prop; + int nameoff; + + FDT_SW_CHECK_HEADER(fdt); + + nameoff = _fdt_find_add_string(fdt, name); + if (nameoff == 0) + return -FDT_ERR_NOSPACE; + + prop = _fdt_grab_space(fdt, sizeof(*prop) + FDT_TAGALIGN(len)); + if (! prop) + return -FDT_ERR_NOSPACE; + + prop->tag = cpu_to_fdt32(FDT_PROP); + prop->nameoff = cpu_to_fdt32(nameoff); + prop->len = cpu_to_fdt32(len); + memcpy(prop->data, val, len); + return 0; +} + +int fdt_finish(void *fdt) +{ + char *p = (char *)fdt; + fdt32_t *end; + int oldstroffset, newstroffset; + uint32_t tag; + int offset, nextoffset; + + FDT_SW_CHECK_HEADER(fdt); + + /* Add terminator */ + end = _fdt_grab_space(fdt, sizeof(*end)); + if (! end) + return -FDT_ERR_NOSPACE; + *end = cpu_to_fdt32(FDT_END); + + /* Relocate the string table */ + oldstroffset = fdt_totalsize(fdt) - fdt_size_dt_strings(fdt); + newstroffset = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + memmove(p + newstroffset, p + oldstroffset, fdt_size_dt_strings(fdt)); + fdt_set_off_dt_strings(fdt, newstroffset); + + /* Walk the structure, correcting string offsets */ + offset = 0; + while ((tag = fdt_next_tag(fdt, offset, &nextoffset)) != FDT_END) { + if (tag == FDT_PROP) { + struct fdt_property *prop = + _fdt_offset_ptr_w(fdt, offset); + int nameoff; + + nameoff = fdt32_to_cpu(prop->nameoff); + nameoff += fdt_size_dt_strings(fdt); + prop->nameoff = cpu_to_fdt32(nameoff); + } + offset = nextoffset; + } + if (nextoffset < 0) + return nextoffset; + + /* Finally, adjust the header */ + fdt_set_totalsize(fdt, newstroffset + fdt_size_dt_strings(fdt)); + fdt_set_magic(fdt, FDT_MAGIC); + return 0; +} diff --git a/tests/kvm-unit-tests/lib/libfdt/fdt_wip.c b/tests/kvm-unit-tests/lib/libfdt/fdt_wip.c new file mode 100644 index 00000000..c5bbb68d --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/fdt_wip.c @@ -0,0 +1,118 @@ +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + void *propval; + int proplen; + + propval = fdt_getprop_w(fdt, nodeoffset, name, &proplen); + if (! propval) + return proplen; + + if (proplen != len) + return -FDT_ERR_NOSPACE; + + memcpy(propval, val, len); + return 0; +} + +static void _fdt_nop_region(void *start, int len) +{ + fdt32_t *p; + + for (p = start; (char *)p < ((char *)start + len); p++) + *p = cpu_to_fdt32(FDT_NOP); +} + +int fdt_nop_property(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len; + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (! prop) + return len; + + _fdt_nop_region(prop, len + sizeof(*prop)); + + return 0; +} + +int _fdt_node_end_offset(void *fdt, int offset) +{ + int depth = 0; + + while ((offset >= 0) && (depth >= 0)) + offset = fdt_next_node(fdt, offset, &depth); + + return offset; +} + +int fdt_nop_node(void *fdt, int nodeoffset) +{ + int endoffset; + + endoffset = _fdt_node_end_offset(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + _fdt_nop_region(fdt_offset_ptr_w(fdt, nodeoffset, 0), + endoffset - nodeoffset); + return 0; +} diff --git a/tests/kvm-unit-tests/lib/libfdt/libfdt.h b/tests/kvm-unit-tests/lib/libfdt/libfdt.h new file mode 100644 index 00000000..02baa84a --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/libfdt.h @@ -0,0 +1,1514 @@ +#ifndef _LIBFDT_H +#define _LIBFDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#define FDT_FIRST_SUPPORTED_VERSION 0x10 +#define FDT_LAST_SUPPORTED_VERSION 0x11 + +/* Error codes: informative error codes */ +#define FDT_ERR_NOTFOUND 1 + /* FDT_ERR_NOTFOUND: The requested node or property does not exist */ +#define FDT_ERR_EXISTS 2 + /* FDT_ERR_EXISTS: Attemped to create a node or property which + * already exists */ +#define FDT_ERR_NOSPACE 3 + /* FDT_ERR_NOSPACE: Operation needed to expand the device + * tree, but its buffer did not have sufficient space to + * contain the expanded tree. Use fdt_open_into() to move the + * device tree to a buffer with more space. */ + +/* Error codes: codes for bad parameters */ +#define FDT_ERR_BADOFFSET 4 + /* FDT_ERR_BADOFFSET: Function was passed a structure block + * offset which is out-of-bounds, or which points to an + * unsuitable part of the structure for the operation. */ +#define FDT_ERR_BADPATH 5 + /* FDT_ERR_BADPATH: Function was passed a badly formatted path + * (e.g. missing a leading / for a function which requires an + * absolute path) */ +#define FDT_ERR_BADPHANDLE 6 + /* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle + * value. phandle values of 0 and -1 are not permitted. */ +#define FDT_ERR_BADSTATE 7 + /* FDT_ERR_BADSTATE: Function was passed an incomplete device + * tree created by the sequential-write functions, which is + * not sufficiently complete for the requested operation. */ + +/* Error codes: codes for bad device tree blobs */ +#define FDT_ERR_TRUNCATED 8 + /* FDT_ERR_TRUNCATED: Structure block of the given device tree + * ends without an FDT_END tag. */ +#define FDT_ERR_BADMAGIC 9 + /* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a + * device tree at all - it is missing the flattened device + * tree magic number. */ +#define FDT_ERR_BADVERSION 10 + /* FDT_ERR_BADVERSION: Given device tree has a version which + * can't be handled by the requested operation. For + * read-write functions, this may mean that fdt_open_into() is + * required to convert the tree to the expected version. */ +#define FDT_ERR_BADSTRUCTURE 11 + /* FDT_ERR_BADSTRUCTURE: Given device tree has a corrupt + * structure block or other serious error (e.g. misnested + * nodes, or subnodes preceding properties). */ +#define FDT_ERR_BADLAYOUT 12 + /* FDT_ERR_BADLAYOUT: For read-write functions, the given + * device tree has it's sub-blocks in an order that the + * function can't handle (memory reserve map, then structure, + * then strings). Use fdt_open_into() to reorganize the tree + * into a form suitable for the read-write operations. */ + +/* "Can't happen" error indicating a bug in libfdt */ +#define FDT_ERR_INTERNAL 13 + /* FDT_ERR_INTERNAL: libfdt has failed an internal assertion. + * Should never be returned, if it is, it indicates a bug in + * libfdt itself. */ + +#define FDT_ERR_MAX 13 + +/**********************************************************************/ +/* Low-level functions (you probably don't need these) */ +/**********************************************************************/ + +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int checklen); +static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen) +{ + return (void *)(uintptr_t)fdt_offset_ptr(fdt, offset, checklen); +} + +uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset); + +/**********************************************************************/ +/* Traversal functions */ +/**********************************************************************/ + +int fdt_next_node(const void *fdt, int offset, int *depth); + +/** + * fdt_first_subnode() - get offset of first direct subnode + * + * @fdt: FDT blob + * @offset: Offset of node to check + * @return offset of first subnode, or -FDT_ERR_NOTFOUND if there is none + */ +int fdt_first_subnode(const void *fdt, int offset); + +/** + * fdt_next_subnode() - get offset of next direct subnode + * + * After first calling fdt_first_subnode(), call this function repeatedly to + * get direct subnodes of a parent node. + * + * @fdt: FDT blob + * @offset: Offset of previous subnode + * @return offset of next subnode, or -FDT_ERR_NOTFOUND if there are no more + * subnodes + */ +int fdt_next_subnode(const void *fdt, int offset); + +/**********************************************************************/ +/* General functions */ +/**********************************************************************/ + +#define fdt_get_header(fdt, field) \ + (fdt32_to_cpu(((const struct fdt_header *)(fdt))->field)) +#define fdt_magic(fdt) (fdt_get_header(fdt, magic)) +#define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize)) +#define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct)) +#define fdt_off_dt_strings(fdt) (fdt_get_header(fdt, off_dt_strings)) +#define fdt_off_mem_rsvmap(fdt) (fdt_get_header(fdt, off_mem_rsvmap)) +#define fdt_version(fdt) (fdt_get_header(fdt, version)) +#define fdt_last_comp_version(fdt) (fdt_get_header(fdt, last_comp_version)) +#define fdt_boot_cpuid_phys(fdt) (fdt_get_header(fdt, boot_cpuid_phys)) +#define fdt_size_dt_strings(fdt) (fdt_get_header(fdt, size_dt_strings)) +#define fdt_size_dt_struct(fdt) (fdt_get_header(fdt, size_dt_struct)) + +#define __fdt_set_hdr(name) \ + static inline void fdt_set_##name(void *fdt, uint32_t val) \ + { \ + struct fdt_header *fdth = (struct fdt_header*)fdt; \ + fdth->name = cpu_to_fdt32(val); \ + } +__fdt_set_hdr(magic); +__fdt_set_hdr(totalsize); +__fdt_set_hdr(off_dt_struct); +__fdt_set_hdr(off_dt_strings); +__fdt_set_hdr(off_mem_rsvmap); +__fdt_set_hdr(version); +__fdt_set_hdr(last_comp_version); +__fdt_set_hdr(boot_cpuid_phys); +__fdt_set_hdr(size_dt_strings); +__fdt_set_hdr(size_dt_struct); +#undef __fdt_set_hdr + +/** + * fdt_check_header - sanity check a device tree or possible device tree + * @fdt: pointer to data which might be a flattened device tree + * + * fdt_check_header() checks that the given buffer contains what + * appears to be a flattened device tree with sane information in its + * header. + * + * returns: + * 0, if the buffer appears to contain a valid device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings, as above + */ +int fdt_check_header(const void *fdt); + +/** + * fdt_move - move a device tree around in memory + * @fdt: pointer to the device tree to move + * @buf: pointer to memory where the device is to be moved + * @bufsize: size of the memory space at buf + * + * fdt_move() relocates, if possible, the device tree blob located at + * fdt to the buffer at buf of size bufsize. The buffer may overlap + * with the existing device tree blob at fdt. Therefore, + * fdt_move(fdt, fdt, fdt_totalsize(fdt)) + * should always succeed. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient to contain the device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_move(const void *fdt, void *buf, int bufsize); + +/**********************************************************************/ +/* Read-only functions */ +/**********************************************************************/ + +/** + * fdt_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * + * fdt_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds + */ +const char *fdt_string(const void *fdt, int stroffset); + +/** + * fdt_num_mem_rsv - retrieve the number of memory reserve map entries + * @fdt: pointer to the device tree blob + * + * Returns the number of entries in the device tree blob's memory + * reservation map. This does not include the terminating 0,0 entry + * or any other (0,0) entries reserved for expansion. + * + * returns: + * the number of entries + */ +int fdt_num_mem_rsv(const void *fdt); + +/** + * fdt_get_mem_rsv - retrieve one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: pointers to 64-bit variables + * + * On success, *address and *size will contain the address and size of + * the n-th reserve map entry from the device tree blob, in + * native-endian format. + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size); + +/** + * fdt_subnode_offset_namelen - find a subnode based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_subnode_offset(), but only examine the first + * namelen characters of name for matching the subnode name. This is + * useful for finding subnodes based on a portion of a larger string, + * such as a full path. + */ +int fdt_subnode_offset_namelen(const void *fdt, int parentoffset, + const char *name, int namelen); +/** + * fdt_subnode_offset - find a subnode of a given node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_subnode_offset() finds a subnode of the node at structure block + * offset parentoffset with the given name. name may include a unit + * address, in which case fdt_subnode_offset() will find the subnode + * with that unit address, or the unit address may be omitted, in + * which case fdt_subnode_offset() will find an arbitrary subnode + * whose name excluding unit address matches the given name. + * + * returns: + * structure block offset of the requested subnode (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_subnode_offset(const void *fdt, int parentoffset, const char *name); + +/** + * fdt_path_offset - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * + * fdt_path_offset() finds a node of a given path in the device tree. + * Each path component may omit the unit address portion, but the + * results of this are undefined if any such path component is + * ambiguous (that is if there are multiple nodes at the relevant + * level matching the given component, differentiated only by unit + * address). + * + * returns: + * structure block offset of the node with the requested path (>=0), on success + * -FDT_ERR_BADPATH, given path does not begin with '/' or is invalid + * -FDT_ERR_NOTFOUND, if the requested node does not exist + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_path_offset(const void *fdt, const char *path); + +/** + * fdt_get_name - retrieve the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the starting node + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_name() retrieves the name (including unit address) of the + * device tree node at structure block offset nodeoffset. If lenp is + * non-NULL, the length of this name is also returned, in the integer + * pointed to by lenp. + * + * returns: + * pointer to the node's name, on success + * If lenp is non-NULL, *lenp contains the length of that name (>=0) + * NULL, on error + * if lenp is non-NULL *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +const char *fdt_get_name(const void *fdt, int nodeoffset, int *lenp); + +/** + * fdt_first_property_offset - find the offset of a node's first property + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * + * fdt_first_property_offset() finds the first property of the node at + * the given structure block offset. + * + * returns: + * structure block offset of the property (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested node has no properties + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_first_property_offset(const void *fdt, int nodeoffset); + +/** + * fdt_next_property_offset - step through a node's properties + * @fdt: pointer to the device tree blob + * @offset: structure block offset of a property + * + * fdt_next_property_offset() finds the property immediately after the + * one at the given structure block offset. This will be a property + * of the same node as the given property. + * + * returns: + * structure block offset of the next property (>=0), on success + * -FDT_ERR_NOTFOUND, if the given property is the last in its node + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_next_property_offset(const void *fdt, int offset); + +/** + * fdt_get_property_by_offset - retrieve the property at a given offset + * @fdt: pointer to the device tree blob + * @offset: offset of the property to retrieve + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property_by_offset() retrieves a pointer to the + * fdt_property structure within the device tree blob at the given + * offset. If lenp is non-NULL, the length of the property value is + * also returned, in the integer pointed to by lenp. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp); + +/** + * fdt_get_property_namelen - find a property based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_get_property_namelen(), but only examine the first + * namelen characters of name for matching the property name. + */ +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int nodeoffset, + const char *name, + int namelen, int *lenp); + +/** + * fdt_get_property - find a given property in a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property() retrieves a pointer to the fdt_property + * structure within the device tree blob corresponding to the property + * named 'name' of the node at offset nodeoffset. If lenp is + * non-NULL, the length of the property value is also returned, in the + * integer pointed to by lenp. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline struct fdt_property *fdt_get_property_w(void *fdt, int nodeoffset, + const char *name, + int *lenp) +{ + return (struct fdt_property *)(uintptr_t) + fdt_get_property(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_getprop_by_offset - retrieve the value of a property at a given offset + * @fdt: pointer to the device tree blob + * @ffset: offset of the property to read + * @namep: pointer to a string variable (will be overwritten) or NULL + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop_by_offset() retrieves a pointer to the value of the + * property at structure block offset 'offset' (this will be a pointer + * to within the device blob itself, not a copy of the value). If + * lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. If namep is non-NULL, + * the property's namne will also be returned in the char * pointed to + * by namep (this will be a pointer to within the device tree's string + * block, not a new copy of the name). + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * if namep is non-NULL *namep contiains a pointer to the property + * name. + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp); + +/** + * fdt_getprop_namelen - get property value based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_getprop(), but only examine the first namelen + * characters of name for matching the property name. + */ +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp); + +/** + * fdt_getprop - retrieve the value of a given property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop() retrieves a pointer to the value of the property + * named 'name' of the node at offset nodeoffset (this will be a + * pointer to within the device blob itself, not a copy of the value). + * If lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline void *fdt_getprop_w(void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_get_phandle - retrieve the phandle of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the node + * + * fdt_get_phandle() retrieves the phandle of the device tree node at + * structure block offset nodeoffset. + * + * returns: + * the phandle of the node at nodeoffset, on success (!= 0, != -1) + * 0, if the node has no phandle, or another error occurs + */ +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset); + +/** + * fdt_get_alias_namelen - get alias based on substring + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * @namelen: number of characters of name to consider + * + * Identical to fdt_get_alias(), but only examine the first namelen + * characters of name for matching the alias name. + */ +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen); + +/** + * fdt_get_alias - retreive the path referenced by a given alias + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * + * fdt_get_alias() retrieves the value of a given alias. That is, the + * value of the property named 'name' in the node /aliases. + * + * returns: + * a pointer to the expansion of the alias named 'name', if it exists + * NULL, if the given alias or the /aliases node does not exist + */ +const char *fdt_get_alias(const void *fdt, const char *name); + +/** + * fdt_get_path - determine the full path of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose path to find + * @buf: character buffer to contain the returned path (will be overwritten) + * @buflen: size of the character buffer at buf + * + * fdt_get_path() computes the full path of the node at offset + * nodeoffset, and records that path in the buffer at buf. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * 0, on success + * buf contains the absolute path of the node at + * nodeoffset, as a NUL-terminated string. + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOSPACE, the path of the given node is longer than (bufsize-1) + * characters and will not fit in the given buffer. + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen); + +/** + * fdt_supernode_atdepth_offset - find a specific ancestor of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * @supernodedepth: depth of the ancestor to find + * @nodedepth: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_supernode_atdepth_offset() finds an ancestor of the given node + * at a specific depth from the root (where the root itself has depth + * 0, its immediate subnodes depth 1 and so forth). So + * fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, NULL); + * will always return 0, the offset of the root node. If the node at + * nodeoffset has depth D, then: + * fdt_supernode_atdepth_offset(fdt, nodeoffset, D, NULL); + * will return nodeoffset itself. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + + * structure block offset of the node at node offset's ancestor + * of depth supernodedepth (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag +* -FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of nodeoffset + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth); + +/** + * fdt_node_depth - find the depth of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_node_depth() finds the depth of a given node. The root node + * has depth 0, its immediate subnodes depth 1 and so forth. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * depth of the node at nodeoffset (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_depth(const void *fdt, int nodeoffset); + +/** + * fdt_parent_offset - find the parent of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_parent_offset() locates the parent node of a given node (that + * is, it finds the offset of the node which contains the node at + * nodeoffset as a subnode). + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset, *twice*. + * + * returns: + * structure block offset of the parent of the node at nodeoffset + * (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_parent_offset(const void *fdt, int nodeoffset); + +/** + * fdt_node_offset_by_prop_value - find nodes with a given property value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @propname: property name to check + * @propval: property value to search for + * @proplen: length of the value in propval + * + * fdt_node_offset_by_prop_value() returns the offset of the first + * node after startoffset, which has a property named propname whose + * value is of length proplen and has value equal to propval; or if + * startoffset is -1, the very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_prop_value(fdt, -1, propname, + * propval, proplen); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_prop_value(fdt, offset, propname, + * propval, proplen); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen); + +/** + * fdt_node_offset_by_phandle - find the node with a given phandle + * @fdt: pointer to the device tree blob + * @phandle: phandle value + * + * fdt_node_offset_by_phandle() returns the offset of the node + * which has the given phandle value. If there is more than one node + * in the tree with the given phandle (an invalid tree), results are + * undefined. + * + * returns: + * structure block offset of the located node (>= 0), on success + * -FDT_ERR_NOTFOUND, no node with that phandle exists + * -FDT_ERR_BADPHANDLE, given phandle value was invalid (0 or -1) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle); + +/** + * fdt_node_check_compatible: check a node's compatible property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @compatible: string to match against + * + * + * fdt_node_check_compatible() returns 0 if the given node contains a + * 'compatible' property with the given string as one of its elements, + * it returns non-zero otherwise, or on error. + * + * returns: + * 0, if the node has a 'compatible' property listing the given string + * 1, if the node has a 'compatible' property, but it does not list + * the given string + * -FDT_ERR_NOTFOUND, if the given node has no 'compatible' property + * -FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible); + +/** + * fdt_node_offset_by_compatible - find nodes with a given 'compatible' value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @compatible: 'compatible' string to match against + * + * fdt_node_offset_by_compatible() returns the offset of the first + * node after startoffset, which has a 'compatible' property which + * lists the given compatible string; or if startoffset is -1, the + * very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_compatible(fdt, -1, compatible); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_compatible(fdt, offset, compatible); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible); + +/** + * fdt_stringlist_contains - check a string list property for a string + * @strlist: Property containing a list of strings to check + * @listlen: Length of property + * @str: String to search for + * + * This is a utility function provided for convenience. The list contains + * one or more strings, each terminated by \0, as is found in a device tree + * "compatible" property. + * + * @return: 1 if the string is found in the list, 0 not found, or invalid list + */ +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str); + +/**********************************************************************/ +/* Write-in-place functions */ +/**********************************************************************/ + +/** + * fdt_setprop_inplace - change a property's value, but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * fdt_setprop_inplace() replaces the value of a given property with + * the data in val, of length len. This function cannot change the + * size of a property, and so will only work if len is equal to the + * current length of the property. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if len is not equal to the property's current length + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_setprop_inplace_u32 - change the value of a 32-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to replace the property with + * + * fdt_setprop_inplace_u32() replaces the value of a given property + * with the 32-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 4. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 4 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_u64 - change the value of a 64-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to replace the property with + * + * fdt_setprop_inplace_u64() replaces the value of a given property + * with the 64-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 8. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 8 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_cell - change the value of a single-cell property + * + * This is an alternative name for fdt_setprop_inplace_u32() + */ +static inline int fdt_setprop_inplace_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_setprop_inplace_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_nop_property - replace a property with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_nop_property() will replace a given property's representation + * in the blob with FDT_NOP tags, effectively removing it from the + * tree. + * + * This function will alter only the bytes in the blob which contain + * the property, and will not alter or move any other part of the + * tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_property(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_nop_node - replace a node (subtree) with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_nop_node() will replace a given node's representation in the + * blob, including all its subnodes, if any, with FDT_NOP tags, + * effectively removing it from the tree. + * + * This function will alter only the bytes in the blob which contain + * the node and its properties and subnodes, and will not alter or + * move any other part of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_node(void *fdt, int nodeoffset); + +/**********************************************************************/ +/* Sequential write functions */ +/**********************************************************************/ + +int fdt_create(void *buf, int bufsize); +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size); +int fdt_finish_reservemap(void *fdt); +int fdt_begin_node(void *fdt, const char *name); +int fdt_property(void *fdt, const char *name, const void *val, int len); +static inline int fdt_property_u32(void *fdt, const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} +static inline int fdt_property_u64(void *fdt, const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} +static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val) +{ + return fdt_property_u32(fdt, name, val); +} +#define fdt_property_string(fdt, name, str) \ + fdt_property(fdt, name, str, strlen(str)+1) +int fdt_end_node(void *fdt); +int fdt_finish(void *fdt); + +/**********************************************************************/ +/* Read-write functions */ +/**********************************************************************/ + +int fdt_create_empty_tree(void *buf, int bufsize); +int fdt_open_into(const void *fdt, void *buf, int bufsize); +int fdt_pack(void *fdt); + +/** + * fdt_add_mem_rsv - add one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: 64-bit values (native endian) + * + * Adds a reserve map entry to the given blob reserving a region at + * address address of length size. + * + * This function will insert data into the reserve map and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new reservation entry + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size); + +/** + * fdt_del_mem_rsv - remove a memory reserve map entry + * @fdt: pointer to the device tree blob + * @n: entry to remove + * + * fdt_del_mem_rsv() removes the n-th memory reserve map entry from + * the blob. + * + * This function will delete data from the reservation table and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, there is no entry of the given index (i.e. there + * are less than n+1 reserve map entries) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_mem_rsv(void *fdt, int n); + +/** + * fdt_set_name - change the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * @name: name to give the node + * + * fdt_set_name() replaces the name (including unit address, if any) + * of the given node with the given string. NOTE: this function can't + * efficiently check if the new name is unique amongst the given + * node's siblings; results are undefined if this function is invoked + * with a name equal to one of the given node's siblings. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob + * to contain the new name + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_set_name(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_setprop - create or change a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to set the property value to + * @len: length of the property value + * + * fdt_setprop() sets the value of the named property in the given + * node to the given value and length, creating the property if it + * does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_setprop_u32 - set a property to a 32-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value for the property (native endian) + * + * fdt_setprop_u32() sets the value of the named property in the given + * node to the given 32-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u32(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_u64 - set a property to a 64-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value for the property (native endian) + * + * fdt_setprop_u64() sets the value of the named property in the given + * node to the given 64-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u64(void *fdt, int nodeoffset, const char *name, + uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_cell - set a property to a single cell value + * + * This is an alternative name for fdt_setprop_u32() + */ +static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + return fdt_setprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_setprop_string - set a property to a string value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value for the property + * + * fdt_setprop_string() sets the value of the named property in the + * given node to the given string value (using the length of the + * string to determine the new length of the property), or creates a + * new property with that value if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_string(fdt, nodeoffset, name, str) \ + fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + +/** + * fdt_appendprop - append to or create a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to append to + * @val: pointer to data to append to the property value + * @len: length of the data to append to the property value + * + * fdt_appendprop() appends the value to the named property in the + * given node, creating the property if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_appendprop_u32 - append a 32-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u32() appends the given 32-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_u64 - append a 64-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u64() appends the given 64-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_cell - append a single cell value to a property + * + * This is an alternative name for fdt_appendprop_u32() + */ +static inline int fdt_appendprop_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_appendprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_appendprop_string - append a string to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value to append to the property + * + * fdt_appendprop_string() appends the given string to the value of + * the named property in the given node, or creates a new property + * with that value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_appendprop_string(fdt, nodeoffset, name, str) \ + fdt_appendprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + +/** + * fdt_delprop - delete a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_del_property() will delete the given property. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_delprop(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_add_subnode_namelen - creates a new node based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_add_subnode(), but use only the first namelen + * characters of name as the name of the new node. This is useful for + * creating subnodes based on a portion of a larger string, such as a + * full path. + */ +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen); + +/** + * fdt_add_subnode - creates a new node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_add_subnode() creates a new node as a subnode of the node at + * structure block offset parentoffset, with the given name (which + * should include the unit address, if any). + * + * This function will insert data into the blob, and will therefore + * change the offsets of some existing nodes. + + * returns: + * structure block offset of the created nodeequested subnode (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_EXISTS, if the node at parentoffset already has a subnode of + * the given name + * -FDT_ERR_NOSPACE, if there is insufficient free space in the + * blob to contain the new node + * -FDT_ERR_NOSPACE + * -FDT_ERR_BADLAYOUT + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_add_subnode(void *fdt, int parentoffset, const char *name); + +/** + * fdt_del_node - delete a node (subtree) + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_del_node() will remove the given node, including all its + * subnodes if any, from the blob. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_node(void *fdt, int nodeoffset); + +/**********************************************************************/ +/* Debugging / informational functions */ +/**********************************************************************/ + +const char *fdt_strerror(int errval); + +#endif /* _LIBFDT_H */ diff --git a/tests/kvm-unit-tests/lib/libfdt/libfdt_env.h b/tests/kvm-unit-tests/lib/libfdt/libfdt_env.h new file mode 100644 index 00000000..9dea97df --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/libfdt_env.h @@ -0,0 +1,111 @@ +#ifndef _LIBFDT_ENV_H +#define _LIBFDT_ENV_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifdef __CHECKER__ +#define __force __attribute__((force)) +#define __bitwise __attribute__((bitwise)) +#else +#define __force +#define __bitwise +#endif + +typedef uint16_t __bitwise fdt16_t; +typedef uint32_t __bitwise fdt32_t; +typedef uint64_t __bitwise fdt64_t; + +#define EXTRACT_BYTE(x, n) ((unsigned long long)((uint8_t *)&x)[n]) +#define CPU_TO_FDT16(x) ((EXTRACT_BYTE(x, 0) << 8) | EXTRACT_BYTE(x, 1)) +#define CPU_TO_FDT32(x) ((EXTRACT_BYTE(x, 0) << 24) | (EXTRACT_BYTE(x, 1) << 16) | \ + (EXTRACT_BYTE(x, 2) << 8) | EXTRACT_BYTE(x, 3)) +#define CPU_TO_FDT64(x) ((EXTRACT_BYTE(x, 0) << 56) | (EXTRACT_BYTE(x, 1) << 48) | \ + (EXTRACT_BYTE(x, 2) << 40) | (EXTRACT_BYTE(x, 3) << 32) | \ + (EXTRACT_BYTE(x, 4) << 24) | (EXTRACT_BYTE(x, 5) << 16) | \ + (EXTRACT_BYTE(x, 6) << 8) | EXTRACT_BYTE(x, 7)) + +static inline uint16_t fdt16_to_cpu(fdt16_t x) +{ + return (__force uint16_t)CPU_TO_FDT16(x); +} +static inline fdt16_t cpu_to_fdt16(uint16_t x) +{ + return (__force fdt16_t)CPU_TO_FDT16(x); +} + +static inline uint32_t fdt32_to_cpu(fdt32_t x) +{ + return (__force uint32_t)CPU_TO_FDT32(x); +} +static inline fdt32_t cpu_to_fdt32(uint32_t x) +{ + return (__force fdt32_t)CPU_TO_FDT32(x); +} + +static inline uint64_t fdt64_to_cpu(fdt64_t x) +{ + return (__force uint64_t)CPU_TO_FDT64(x); +} +static inline fdt64_t cpu_to_fdt64(uint64_t x) +{ + return (__force fdt64_t)CPU_TO_FDT64(x); +} +#undef CPU_TO_FDT64 +#undef CPU_TO_FDT32 +#undef CPU_TO_FDT16 +#undef EXTRACT_BYTE + +#endif /* _LIBFDT_ENV_H */ diff --git a/tests/kvm-unit-tests/lib/libfdt/libfdt_internal.h b/tests/kvm-unit-tests/lib/libfdt/libfdt_internal.h new file mode 100644 index 00000000..381133ba --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/libfdt_internal.h @@ -0,0 +1,95 @@ +#ifndef _LIBFDT_INTERNAL_H +#define _LIBFDT_INTERNAL_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * + * libfdt is dual licensed: you can use it either under the terms of + * the GPL, or the BSD license, at your option. + * + * a) This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + * + * Alternatively, + * + * b) Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE)) + +#define FDT_CHECK_HEADER(fdt) \ + { \ + int err; \ + if ((err = fdt_check_header(fdt)) != 0) \ + return err; \ + } + +int _fdt_check_node_offset(const void *fdt, int offset); +int _fdt_check_prop_offset(const void *fdt, int offset); +const char *_fdt_find_string(const char *strtab, int tabsize, const char *s); +int _fdt_node_end_offset(void *fdt, int nodeoffset); + +static inline const void *_fdt_offset_ptr(const void *fdt, int offset) +{ + return (const char *)fdt + fdt_off_dt_struct(fdt) + offset; +} + +static inline void *_fdt_offset_ptr_w(void *fdt, int offset) +{ + return (void *)(uintptr_t)_fdt_offset_ptr(fdt, offset); +} + +static inline const struct fdt_reserve_entry *_fdt_mem_rsv(const void *fdt, int n) +{ + const struct fdt_reserve_entry *rsv_table = + (const struct fdt_reserve_entry *) + ((const char *)fdt + fdt_off_mem_rsvmap(fdt)); + + return rsv_table + n; +} +static inline struct fdt_reserve_entry *_fdt_mem_rsv_w(void *fdt, int n) +{ + return (void *)(uintptr_t)_fdt_mem_rsv(fdt, n); +} + +#define FDT_SW_MAGIC (~FDT_MAGIC) + +#endif /* _LIBFDT_INTERNAL_H */ diff --git a/tests/kvm-unit-tests/lib/libfdt/version.lds b/tests/kvm-unit-tests/lib/libfdt/version.lds new file mode 100644 index 00000000..80b322be --- /dev/null +++ b/tests/kvm-unit-tests/lib/libfdt/version.lds @@ -0,0 +1,60 @@ +LIBFDT_1.2 { + global: + fdt_next_node; + fdt_check_header; + fdt_move; + fdt_string; + fdt_num_mem_rsv; + fdt_get_mem_rsv; + fdt_subnode_offset_namelen; + fdt_subnode_offset; + fdt_path_offset; + fdt_get_name; + fdt_get_property_namelen; + fdt_get_property; + fdt_getprop_namelen; + fdt_getprop; + fdt_get_phandle; + fdt_get_alias_namelen; + fdt_get_alias; + fdt_get_path; + fdt_supernode_atdepth_offset; + fdt_node_depth; + fdt_parent_offset; + fdt_node_offset_by_prop_value; + fdt_node_offset_by_phandle; + fdt_node_check_compatible; + fdt_node_offset_by_compatible; + fdt_setprop_inplace; + fdt_nop_property; + fdt_nop_node; + fdt_create; + fdt_add_reservemap_entry; + fdt_finish_reservemap; + fdt_begin_node; + fdt_property; + fdt_end_node; + fdt_finish; + fdt_open_into; + fdt_pack; + fdt_add_mem_rsv; + fdt_del_mem_rsv; + fdt_set_name; + fdt_setprop; + fdt_delprop; + fdt_add_subnode_namelen; + fdt_add_subnode; + fdt_del_node; + fdt_strerror; + fdt_offset_ptr; + fdt_next_tag; + fdt_appendprop; + fdt_create_empty_tree; + fdt_first_property_offset; + fdt_get_property_by_offset; + fdt_getprop_by_offset; + fdt_next_property_offset; + + local: + *; +}; diff --git a/tests/kvm-unit-tests/lib/linux/const.h b/tests/kvm-unit-tests/lib/linux/const.h new file mode 100644 index 00000000..c872bfd2 --- /dev/null +++ b/tests/kvm-unit-tests/lib/linux/const.h @@ -0,0 +1,27 @@ +/* const.h: Macros for dealing with constants. */ + +#ifndef _LINUX_CONST_H +#define _LINUX_CONST_H + +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specifiers unilaterally. We + * use the following macros to deal with this. + * + * Similarly, _AT() will cast an expression with a type in C, but + * leave it unchanged in asm. + */ + +#ifdef __ASSEMBLY__ +#define _AC(X,Y) X +#define _AT(T,X) X +#else +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) +#endif + +#define _BITUL(x) (_AC(1,UL) << (x)) +#define _BITULL(x) (_AC(1,ULL) << (x)) + +#endif /* !(_LINUX_CONST_H) */ diff --git a/tests/kvm-unit-tests/lib/linux/pci_regs.h b/tests/kvm-unit-tests/lib/linux/pci_regs.h new file mode 100644 index 00000000..1becea86 --- /dev/null +++ b/tests/kvm-unit-tests/lib/linux/pci_regs.h @@ -0,0 +1,949 @@ +/* + * pci_regs.h + * + * PCI standard defines + * Copyright 1994, Drew Eckhardt + * Copyright 1997--1999 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + * For HyperTransport information, please consult the following manuals + * from http://www.hypertransport.org + * + * The HyperTransport I/O Link Specification + */ + +#ifndef LINUX_PCI_REGS_H +#define LINUX_PCI_REGS_H + +/* + * Under PCI, each device has 256 bytes of configuration address space, + * of which the first 64 bytes are standardized as follows: + */ +#define PCI_STD_HEADER_SIZEOF 64 +#define PCI_VENDOR_ID 0x00 /* 16 bits */ +#define PCI_DEVICE_ID 0x02 /* 16 bits */ +#define PCI_COMMAND 0x04 /* 16 bits */ +#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ +#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ +#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ +#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ +#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ +#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ +#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ +#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ +#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ + +#define PCI_STATUS 0x06 /* 16 bits */ +#define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 MHz PCI 2.1 bus */ +#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ +#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ +#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ +#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ +#define PCI_STATUS_DEVSEL_FAST 0x000 +#define PCI_STATUS_DEVSEL_MEDIUM 0x200 +#define PCI_STATUS_DEVSEL_SLOW 0x400 +#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ +#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ +#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ +#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ +#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ + +#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 revision */ +#define PCI_REVISION_ID 0x08 /* Revision ID */ +#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ + +#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ +#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ +#define PCI_HEADER_TYPE 0x0e /* 8 bits */ +#define PCI_HEADER_TYPE_NORMAL 0 +#define PCI_HEADER_TYPE_BRIDGE 1 +#define PCI_HEADER_TYPE_CARDBUS 2 + +#define PCI_BIST 0x0f /* 8 bits */ +#define PCI_BIST_CODE_MASK 0x0f /* Return result */ +#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ +#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ + +/* + * Base addresses specify locations in memory or I/O space. + * Decoded size can be determined by writing a value of + * 0xffffffff to the register, and reading it back. Only + * 1 bits are decoded. + */ +#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ +#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ +#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ +#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ +#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ +#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ +#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ +#define PCI_BASE_ADDRESS_SPACE_IO 0x01 +#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 +#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 +#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ +#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ +#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ +#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ +#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) +#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) +/* bit 1 is reserved if address_space = 1 */ + +/* Header type 0 (normal devices) */ +#define PCI_CARDBUS_CIS 0x28 +#define PCI_SUBSYSTEM_VENDOR_ID 0x2c +#define PCI_SUBSYSTEM_ID 0x2e +#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ +#define PCI_ROM_ADDRESS_ENABLE 0x01 +#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) + +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ + +/* 0x35-0x3b are reserved */ +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ +#define PCI_MIN_GNT 0x3e /* 8 bits */ +#define PCI_MAX_LAT 0x3f /* 8 bits */ + +/* Header type 1 (PCI-to-PCI bridges) */ +#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ +#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ +#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ +#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ +#define PCI_IO_LIMIT 0x1d +#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ +#define PCI_IO_RANGE_TYPE_16 0x00 +#define PCI_IO_RANGE_TYPE_32 0x01 +#define PCI_IO_RANGE_MASK (~0x0fUL) /* Standard 4K I/O windows */ +#define PCI_IO_1K_RANGE_MASK (~0x03UL) /* Intel 1K I/O windows */ +#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ +#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ +#define PCI_MEMORY_LIMIT 0x22 +#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL +#define PCI_MEMORY_RANGE_MASK (~0x0fUL) +#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ +#define PCI_PREF_MEMORY_LIMIT 0x26 +#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL +#define PCI_PREF_RANGE_TYPE_32 0x00 +#define PCI_PREF_RANGE_TYPE_64 0x01 +#define PCI_PREF_RANGE_MASK (~0x0fUL) +#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ +#define PCI_PREF_LIMIT_UPPER32 0x2c +#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ +#define PCI_IO_LIMIT_UPPER16 0x32 +/* 0x34 same as for htype 0 */ +/* 0x35-0x3b is reserved */ +#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_BRIDGE_CONTROL 0x3e +#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ +#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ +#define PCI_BRIDGE_CTL_ISA 0x04 /* Enable ISA mode */ +#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ +#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ +#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ +#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ + +/* Header type 2 (CardBus bridges) */ +#define PCI_CB_CAPABILITY_LIST 0x14 +/* 0x15 reserved */ +#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ +#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ +#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ +#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ +#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ +#define PCI_CB_MEMORY_BASE_0 0x1c +#define PCI_CB_MEMORY_LIMIT_0 0x20 +#define PCI_CB_MEMORY_BASE_1 0x24 +#define PCI_CB_MEMORY_LIMIT_1 0x28 +#define PCI_CB_IO_BASE_0 0x2c +#define PCI_CB_IO_BASE_0_HI 0x2e +#define PCI_CB_IO_LIMIT_0 0x30 +#define PCI_CB_IO_LIMIT_0_HI 0x32 +#define PCI_CB_IO_BASE_1 0x34 +#define PCI_CB_IO_BASE_1_HI 0x36 +#define PCI_CB_IO_LIMIT_1 0x38 +#define PCI_CB_IO_LIMIT_1_HI 0x3a +#define PCI_CB_IO_RANGE_MASK (~0x03UL) +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_CB_BRIDGE_CONTROL 0x3e +#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ +#define PCI_CB_BRIDGE_CTL_SERR 0x02 +#define PCI_CB_BRIDGE_CTL_ISA 0x04 +#define PCI_CB_BRIDGE_CTL_VGA 0x08 +#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 +#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ +#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 +#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 +#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 +#define PCI_CB_SUBSYSTEM_ID 0x42 +#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ +/* 0x48-0x7f reserved */ + +/* Capability lists */ + +#define PCI_CAP_LIST_ID 0 /* Capability ID */ +#define PCI_CAP_ID_PM 0x01 /* Power Management */ +#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ +#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ +#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ +#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ +#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ +#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ +#define PCI_CAP_ID_HT 0x08 /* HyperTransport */ +#define PCI_CAP_ID_VNDR 0x09 /* Vendor-Specific */ +#define PCI_CAP_ID_DBG 0x0A /* Debug port */ +#define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ +#define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ +#define PCI_CAP_ID_SECDEV 0x0F /* Secure Device */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_ID_SATA 0x12 /* SATA Data/Index Conf. */ +#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ +#define PCI_CAP_ID_EA 0x14 /* PCI Enhanced Allocation */ +#define PCI_CAP_ID_MAX PCI_CAP_ID_EA +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ +#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ +#define PCI_CAP_SIZEOF 4 + +/* Power Management Registers */ + +#define PCI_PM_PMC 2 /* PM Capabilities Register */ +#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ +#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ +#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ +#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ +#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxiliary power support mask */ +#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ +#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ +#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ +#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ +#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ +#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ +#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ +#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ +#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ +#define PCI_PM_CTRL 4 /* PM control and status register */ +#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ +#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ +#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ +#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ +#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ +#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ +#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ +#define PCI_PM_DATA_REGISTER 7 /* (??) */ +#define PCI_PM_SIZEOF 8 + +/* AGP registers */ + +#define PCI_AGP_VERSION 2 /* BCD version number */ +#define PCI_AGP_RFU 3 /* Rest of capability flags */ +#define PCI_AGP_STATUS 4 /* Status register */ +#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ +#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ +#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ +#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ +#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ +#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ +#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ +#define PCI_AGP_COMMAND 8 /* Control register */ +#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ +#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ +#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ +#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ +#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ +#define PCI_AGP_SIZEOF 12 + +/* Vital Product Data */ + +#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ +#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ +#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ +#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ +#define PCI_CAP_VPD_SIZEOF 8 + +/* Slot Identification */ + +#define PCI_SID_ESR 2 /* Expansion Slot Register */ +#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ +#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ +#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +/* Message Signalled Interrupts registers */ + +#define PCI_MSI_FLAGS 2 /* Message Control */ +#define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */ +#define PCI_MSI_FLAGS_QMASK 0x000e /* Maximum queue size available */ +#define PCI_MSI_FLAGS_QSIZE 0x0070 /* Message queue size configured */ +#define PCI_MSI_FLAGS_64BIT 0x0080 /* 64-bit addresses allowed */ +#define PCI_MSI_FLAGS_MASKBIT 0x0100 /* Per-vector masking capable */ +#define PCI_MSI_RFU 3 /* Rest of capability flags */ +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ +#define PCI_MSI_PENDING_32 16 /* Pending intrs for 32-bit devices */ +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ +#define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */ + +/* MSI-X registers */ +#define PCI_MSIX_FLAGS 2 /* Message Control */ +#define PCI_MSIX_FLAGS_QSIZE 0x07FF /* Table size */ +#define PCI_MSIX_FLAGS_MASKALL 0x4000 /* Mask all vectors for this function */ +#define PCI_MSIX_FLAGS_ENABLE 0x8000 /* MSI-X enable */ +#define PCI_MSIX_TABLE 4 /* Table offset */ +#define PCI_MSIX_TABLE_BIR 0x00000007 /* BAR index */ +#define PCI_MSIX_TABLE_OFFSET 0xfffffff8 /* Offset into specified BAR */ +#define PCI_MSIX_PBA 8 /* Pending Bit Array offset */ +#define PCI_MSIX_PBA_BIR 0x00000007 /* BAR index */ +#define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ +#define PCI_MSIX_FLAGS_BIRMASK PCI_MSIX_PBA_BIR /* deprecated */ +#define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ + +/* MSI-X Table entry format */ +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 +#define PCI_MSIX_ENTRY_DATA 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 + +/* CompactPCI Hotswap Register */ + +#define PCI_CHSWP_CSR 2 /* Control and Status Register */ +#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ +#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ +#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ +#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ +#define PCI_CHSWP_PI 0x30 /* Programming Interface */ +#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ +#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ + +/* PCI Advanced Feature registers */ + +#define PCI_AF_LENGTH 2 +#define PCI_AF_CAP 3 +#define PCI_AF_CAP_TP 0x01 +#define PCI_AF_CAP_FLR 0x02 +#define PCI_AF_CTRL 4 +#define PCI_AF_CTRL_FLR 0x01 +#define PCI_AF_STATUS 5 +#define PCI_AF_STATUS_TP 0x01 +#define PCI_CAP_AF_SIZEOF 6 /* size of AF registers */ + +/* PCI Enhanced Allocation registers */ + +#define PCI_EA_NUM_ENT 2 /* Number of Capability Entries */ +#define PCI_EA_NUM_ENT_MASK 0x3f /* Num Entries Mask */ +#define PCI_EA_FIRST_ENT 4 /* First EA Entry in List */ +#define PCI_EA_FIRST_ENT_BRIDGE 8 /* First EA Entry for Bridges */ +#define PCI_EA_ES 0x00000007 /* Entry Size */ +#define PCI_EA_BEI 0x000000f0 /* BAR Equivalent Indicator */ +/* 0-5 map to BARs 0-5 respectively */ +#define PCI_EA_BEI_BAR0 0 +#define PCI_EA_BEI_BAR5 5 +#define PCI_EA_BEI_BRIDGE 6 /* Resource behind bridge */ +#define PCI_EA_BEI_ENI 7 /* Equivalent Not Indicated */ +#define PCI_EA_BEI_ROM 8 /* Expansion ROM */ +/* 9-14 map to VF BARs 0-5 respectively */ +#define PCI_EA_BEI_VF_BAR0 9 +#define PCI_EA_BEI_VF_BAR5 14 +#define PCI_EA_BEI_RESERVED 15 /* Reserved - Treat like ENI */ +#define PCI_EA_PP 0x0000ff00 /* Primary Properties */ +#define PCI_EA_SP 0x00ff0000 /* Secondary Properties */ +#define PCI_EA_P_MEM 0x00 /* Non-Prefetch Memory */ +#define PCI_EA_P_MEM_PREFETCH 0x01 /* Prefetchable Memory */ +#define PCI_EA_P_IO 0x02 /* I/O Space */ +#define PCI_EA_P_VF_MEM_PREFETCH 0x03 /* VF Prefetchable Memory */ +#define PCI_EA_P_VF_MEM 0x04 /* VF Non-Prefetch Memory */ +#define PCI_EA_P_BRIDGE_MEM 0x05 /* Bridge Non-Prefetch Memory */ +#define PCI_EA_P_BRIDGE_MEM_PREFETCH 0x06 /* Bridge Prefetchable Memory */ +#define PCI_EA_P_BRIDGE_IO 0x07 /* Bridge I/O Space */ +/* 0x08-0xfc reserved */ +#define PCI_EA_P_MEM_RESERVED 0xfd /* Reserved Memory */ +#define PCI_EA_P_IO_RESERVED 0xfe /* Reserved I/O Space */ +#define PCI_EA_P_UNAVAILABLE 0xff /* Entry Unavailable */ +#define PCI_EA_WRITABLE 0x40000000 /* Writable: 1 = RW, 0 = HwInit */ +#define PCI_EA_ENABLE 0x80000000 /* Enable for this entry */ +#define PCI_EA_BASE 4 /* Base Address Offset */ +#define PCI_EA_MAX_OFFSET 8 /* MaxOffset (resource length) */ +/* bit 0 is reserved */ +#define PCI_EA_IS_64 0x00000002 /* 64-bit field flag */ +#define PCI_EA_FIELD_MASK 0xfffffffc /* For Base & Max Offset */ + +/* PCI-X registers (Type 0 (non-bridge) devices) */ + +#define PCI_X_CMD 2 /* Modes & Features */ +#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ +#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ +#define PCI_X_CMD_READ_512 0x0000 /* 512 byte maximum read byte count */ +#define PCI_X_CMD_READ_1K 0x0004 /* 1Kbyte maximum read byte count */ +#define PCI_X_CMD_READ_2K 0x0008 /* 2Kbyte maximum read byte count */ +#define PCI_X_CMD_READ_4K 0x000c /* 4Kbyte maximum read byte count */ +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ + /* Max # of outstanding split transactions */ +#define PCI_X_CMD_SPLIT_1 0x0000 /* Max 1 */ +#define PCI_X_CMD_SPLIT_2 0x0010 /* Max 2 */ +#define PCI_X_CMD_SPLIT_3 0x0020 /* Max 3 */ +#define PCI_X_CMD_SPLIT_4 0x0030 /* Max 4 */ +#define PCI_X_CMD_SPLIT_8 0x0040 /* Max 8 */ +#define PCI_X_CMD_SPLIT_12 0x0050 /* Max 12 */ +#define PCI_X_CMD_SPLIT_16 0x0060 /* Max 16 */ +#define PCI_X_CMD_SPLIT_32 0x0070 /* Max 32 */ +#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_STATUS 4 /* PCI-X capabilities */ +#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ +#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ +#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ +#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ +#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ +#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ +#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ +#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ +#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ +#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ +#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ +#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ +#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ +#define PCI_X_ECC_CSR 8 /* ECC control and status */ +#define PCI_CAP_PCIX_SIZEOF_V0 8 /* size of registers for Version 0 */ +#define PCI_CAP_PCIX_SIZEOF_V1 24 /* size for Version 1 */ +#define PCI_CAP_PCIX_SIZEOF_V2 PCI_CAP_PCIX_SIZEOF_V1 /* Same for v2 */ + +/* PCI-X registers (Type 1 (bridge) devices) */ + +#define PCI_X_BRIDGE_SSTATUS 2 /* Secondary Status */ +#define PCI_X_SSTATUS_64BIT 0x0001 /* Secondary AD interface is 64 bits */ +#define PCI_X_SSTATUS_133MHZ 0x0002 /* 133 MHz capable */ +#define PCI_X_SSTATUS_FREQ 0x03c0 /* Secondary Bus Mode and Frequency */ +#define PCI_X_SSTATUS_VERS 0x3000 /* PCI-X Capability Version */ +#define PCI_X_SSTATUS_V1 0x1000 /* Mode 2, not Mode 1 */ +#define PCI_X_SSTATUS_V2 0x2000 /* Mode 1 or Modes 1 and 2 */ +#define PCI_X_SSTATUS_266MHZ 0x4000 /* 266 MHz capable */ +#define PCI_X_SSTATUS_533MHZ 0x8000 /* 533 MHz capable */ +#define PCI_X_BRIDGE_STATUS 4 /* Bridge Status */ + +/* PCI Bridge Subsystem ID registers */ + +#define PCI_SSVID_VENDOR_ID 4 /* PCI Bridge subsystem vendor ID */ +#define PCI_SSVID_DEVICE_ID 6 /* PCI Bridge subsystem device ID */ + +/* PCI Express capability registers */ + +#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCIe to PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIe Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ +#define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ +#define PCI_EXP_DEVCAP_EXT_TAG 0x00000020 /* Extended tags */ +#define PCI_EXP_DEVCAP_L0S 0x000001c0 /* L0s Acceptable Latency */ +#define PCI_EXP_DEVCAP_L1 0x00000e00 /* L1 Acceptable Latency */ +#define PCI_EXP_DEVCAP_ATN_BUT 0x00001000 /* Attention Button Present */ +#define PCI_EXP_DEVCAP_ATN_IND 0x00002000 /* Attention Indicator Present */ +#define PCI_EXP_DEVCAP_PWR_IND 0x00004000 /* Power Indicator Present */ +#define PCI_EXP_DEVCAP_RBER 0x00008000 /* Role-Based Error Reporting */ +#define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */ +#define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */ +#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ +#define PCI_EXP_DEVCTL 8 /* Device Control */ +#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ +#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ +#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ +#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ +#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVCTL_READRQ_128B 0x0000 /* 128 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_256B 0x1000 /* 256 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_512B 0x2000 /* 512 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ +#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ +#define PCI_EXP_DEVSTA_NFED 0x0002 /* Non-Fatal Error Detected */ +#define PCI_EXP_DEVSTA_FED 0x0004 /* Fatal Error Detected */ +#define PCI_EXP_DEVSTA_URD 0x0008 /* Unsupported Request Detected */ +#define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ +#define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ +#define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ +#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ +#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* Clock Power Management */ +#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Surprise Down Error Reporting Capable */ +#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ +#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ +#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ +#define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_ASPM_L0S 0x0001 /* L0s Enable */ +#define PCI_EXP_LNKCTL_ASPM_L1 0x0002 /* L1 Enable */ +#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ +#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ +#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ +#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ +#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ +#define PCI_EXP_LNKCTL_CLKREQ_EN 0x0100 /* Enable clkreq */ +#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ +#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ +#define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ +#define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ +#define PCI_EXP_LNKSTA_NLW_X1 0x0010 /* Current Link Width x1 */ +#define PCI_EXP_LNKSTA_NLW_X2 0x0020 /* Current Link Width x2 */ +#define PCI_EXP_LNKSTA_NLW_X4 0x0040 /* Current Link Width x4 */ +#define PCI_EXP_LNKSTA_NLW_X8 0x0080 /* Current Link Width x8 */ +#define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ +#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ +#define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ +#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ +#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ +#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints end here */ +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ +#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ +#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ +#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ +#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ +#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ +#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ +#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ +#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ +#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ +#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ +#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ +#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ +#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ +#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ +#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ +#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ +#define PCI_EXP_SLTCTL_ATTN_IND_ON 0x0040 /* Attention Indicator on */ +#define PCI_EXP_SLTCTL_ATTN_IND_BLINK 0x0080 /* Attention Indicator blinking */ +#define PCI_EXP_SLTCTL_ATTN_IND_OFF 0x00c0 /* Attention Indicator off */ +#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ +#define PCI_EXP_SLTCTL_PWR_IND_ON 0x0100 /* Power Indicator on */ +#define PCI_EXP_SLTCTL_PWR_IND_BLINK 0x0200 /* Power Indicator blinking */ +#define PCI_EXP_SLTCTL_PWR_IND_OFF 0x0300 /* Power Indicator off */ +#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ +#define PCI_EXP_SLTCTL_PWR_ON 0x0000 /* Power On */ +#define PCI_EXP_SLTCTL_PWR_OFF 0x0400 /* Power Off */ +#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ +#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ +#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ +#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ +#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ +#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ +#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ +#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ +#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */ +#define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */ +#define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */ +#define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */ +#define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */ +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ +#define PCI_EXP_RTSTA 32 /* Root Status */ +#define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ +#define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ +/* + * The Device Capabilities 2, Device Status 2, Device Control 2, + * Link Capabilities 2, Link Status 2, Link Control 2, + * Slot Capabilities 2, Slot Status 2, and Slot Control 2 registers + * are only present on devices with PCIe Capability version 2. + * Use pcie_capability_read_word() and similar interfaces to use them + * safely. + */ +#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ +#define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ +#define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ +#define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ +#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ +#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ +#define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ +#define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ +#define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ +#define PCI_EXP_DEVCTL2_OBFF_MSGA_EN 0x2000 /* Enable OBFF Message type A */ +#define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ +#define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ +#define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints end here */ +#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ +#define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ +#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5.0GT/s */ +#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8.0GT/s */ +#define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ +#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ +#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ +#define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ + +/* Extended Capabilities (PCI-X 2.0 and Express) */ +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) + +#define PCI_EXT_CAP_ID_ERR 0x01 /* Advanced Error Reporting */ +#define PCI_EXT_CAP_ID_VC 0x02 /* Virtual Channel Capability */ +#define PCI_EXT_CAP_ID_DSN 0x03 /* Device Serial Number */ +#define PCI_EXT_CAP_ID_PWR 0x04 /* Power Budgeting */ +#define PCI_EXT_CAP_ID_RCLD 0x05 /* Root Complex Link Declaration */ +#define PCI_EXT_CAP_ID_RCILC 0x06 /* Root Complex Internal Link Control */ +#define PCI_EXT_CAP_ID_RCEC 0x07 /* Root Complex Event Collector */ +#define PCI_EXT_CAP_ID_MFVC 0x08 /* Multi-Function VC Capability */ +#define PCI_EXT_CAP_ID_VC9 0x09 /* same as _VC */ +#define PCI_EXT_CAP_ID_RCRB 0x0A /* Root Complex RB? */ +#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor-Specific */ +#define PCI_EXT_CAP_ID_CAC 0x0C /* Config Access - obsolete */ +#define PCI_EXT_CAP_ID_ACS 0x0D /* Access Control Services */ +#define PCI_EXT_CAP_ID_ARI 0x0E /* Alternate Routing ID */ +#define PCI_EXT_CAP_ID_ATS 0x0F /* Address Translation Services */ +#define PCI_EXT_CAP_ID_SRIOV 0x10 /* Single Root I/O Virtualization */ +#define PCI_EXT_CAP_ID_MRIOV 0x11 /* Multi Root I/O Virtualization */ +#define PCI_EXT_CAP_ID_MCAST 0x12 /* Multicast */ +#define PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */ +#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* Reserved for AMD */ +#define PCI_EXT_CAP_ID_REBAR 0x15 /* Resizable BAR */ +#define PCI_EXT_CAP_ID_DPA 0x16 /* Dynamic Power Allocation */ +#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH Requester */ +#define PCI_EXT_CAP_ID_LTR 0x18 /* Latency Tolerance Reporting */ +#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe Capability */ +#define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ +#define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PASID + +#define PCI_EXT_CAP_DSN_SIZEOF 12 +#define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 + +/* Advanced Error Reporting */ +#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNC_UND 0x00000001 /* Undefined */ +#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ +#define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */ +#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ +#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ +#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ +#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ +#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ +#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ +#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ +#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ +#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ +#define PCI_ERR_UNC_ACSV 0x00200000 /* ACS Violation */ +#define PCI_ERR_UNC_INTN 0x00400000 /* internal error */ +#define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */ +#define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */ +#define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */ +#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ + /* Same bits as above */ +#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ +#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ +#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ +#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ +#define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */ +#define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */ +#define PCI_ERR_COR_LOG_OVER 0x00008000 /* Header Log Overflow */ +#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +/* Correctable Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 +/* Non-fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 +/* Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 +#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ +/* Multi ERR_COR Received */ +#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 +/* ERR_FATAL/NONFATAL Received */ +#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 +/* Multi ERR_FATAL/NONFATAL Received */ +#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 +#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ +#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ +#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ +#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ + +/* Virtual Channel */ +#define PCI_VC_PORT_CAP1 4 +#define PCI_VC_CAP1_EVCC 0x00000007 /* extended VC count */ +#define PCI_VC_CAP1_LPEVCC 0x00000070 /* low prio extended VC count */ +#define PCI_VC_CAP1_ARB_SIZE 0x00000c00 +#define PCI_VC_PORT_CAP2 8 +#define PCI_VC_CAP2_32_PHASE 0x00000002 +#define PCI_VC_CAP2_64_PHASE 0x00000004 +#define PCI_VC_CAP2_128_PHASE 0x00000008 +#define PCI_VC_CAP2_ARB_OFF 0xff000000 +#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001 +#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_PORT_STATUS_TABLE 0x00000001 +#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CAP_32_PHASE 0x00000002 +#define PCI_VC_RES_CAP_64_PHASE 0x00000004 +#define PCI_VC_RES_CAP_128_PHASE 0x00000008 +#define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010 +#define PCI_VC_RES_CAP_256_PHASE 0x00000020 +#define PCI_VC_RES_CAP_ARB_OFF 0xff000000 +#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000 +#define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000 +#define PCI_VC_RES_CTRL_ID 0x07000000 +#define PCI_VC_RES_CTRL_ENABLE 0x80000000 +#define PCI_VC_RES_STATUS 26 +#define PCI_VC_RES_STATUS_TABLE 0x00000001 +#define PCI_VC_RES_STATUS_NEGO 0x00000002 +#define PCI_CAP_VC_BASE_SIZEOF 0x10 +#define PCI_CAP_VC_PER_VC_SIZEOF 0x0C + +/* Power Budgeting */ +#define PCI_PWR_DSR 4 /* Data Select Register */ +#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ +#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ +#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ +#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ +#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ +#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ +#define PCI_EXT_CAP_PWR_SIZEOF 16 + +/* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */ +#define PCI_VNDR_HEADER 4 /* Vendor-Specific Header */ +#define PCI_VNDR_HEADER_ID(x) ((x) & 0xffff) +#define PCI_VNDR_HEADER_REV(x) (((x) >> 16) & 0xf) +#define PCI_VNDR_HEADER_LEN(x) (((x) >> 20) & 0xfff) + +/* + * HyperTransport sub capability types + * + * Unfortunately there are both 3 bit and 5 bit capability types defined + * in the HT spec, catering for that is a little messy. You probably don't + * want to use these directly, just use pci_find_ht_capability() and it + * will do the right thing for you. + */ +#define HT_3BIT_CAP_MASK 0xE0 +#define HT_CAPTYPE_SLAVE 0x00 /* Slave/Primary link configuration */ +#define HT_CAPTYPE_HOST 0x20 /* Host/Secondary link configuration */ + +#define HT_5BIT_CAP_MASK 0xF8 +#define HT_CAPTYPE_IRQ 0x80 /* IRQ Configuration */ +#define HT_CAPTYPE_REMAPPING_40 0xA0 /* 40 bit address remapping */ +#define HT_CAPTYPE_REMAPPING_64 0xA2 /* 64 bit address remapping */ +#define HT_CAPTYPE_UNITID_CLUMP 0x90 /* Unit ID clumping */ +#define HT_CAPTYPE_EXTCONF 0x98 /* Extended Configuration Space Access */ +#define HT_CAPTYPE_MSI_MAPPING 0xA8 /* MSI Mapping Capability */ +#define HT_MSI_FLAGS 0x02 /* Offset to flags */ +#define HT_MSI_FLAGS_ENABLE 0x1 /* Mapping enable */ +#define HT_MSI_FLAGS_FIXED 0x2 /* Fixed mapping only */ +#define HT_MSI_FIXED_ADDR 0x00000000FEE00000ULL /* Fixed addr */ +#define HT_MSI_ADDR_LO 0x04 /* Offset to low addr bits */ +#define HT_MSI_ADDR_LO_MASK 0xFFF00000 /* Low address bit mask */ +#define HT_MSI_ADDR_HI 0x08 /* Offset to high addr bits */ +#define HT_CAPTYPE_DIRECT_ROUTE 0xB0 /* Direct routing configuration */ +#define HT_CAPTYPE_VCSET 0xB8 /* Virtual Channel configuration */ +#define HT_CAPTYPE_ERROR_RETRY 0xC0 /* Retry on error configuration */ +#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 HyperTransport configuration */ +#define HT_CAPTYPE_PM 0xE0 /* HyperTransport power management configuration */ +#define HT_CAP_SIZEOF_LONG 28 /* slave & primary */ +#define HT_CAP_SIZEOF_SHORT 24 /* host & secondary */ + +/* Alternative Routing-ID Interpretation */ +#define PCI_ARI_CAP 0x04 /* ARI Capability Register */ +#define PCI_ARI_CAP_MFVC 0x0001 /* MFVC Function Groups Capability */ +#define PCI_ARI_CAP_ACS 0x0002 /* ACS Function Groups Capability */ +#define PCI_ARI_CAP_NFN(x) (((x) >> 8) & 0xff) /* Next Function Number */ +#define PCI_ARI_CTRL 0x06 /* ARI Control Register */ +#define PCI_ARI_CTRL_MFVC 0x0001 /* MFVC Function Groups Enable */ +#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ +#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +#define PCI_EXT_CAP_ARI_SIZEOF 8 + +/* Address Translation Service */ +#define PCI_ATS_CAP 0x04 /* ATS Capability Register */ +#define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ +#define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CTRL 0x06 /* ATS Control Register */ +#define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ +#define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ +#define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ +#define PCI_EXT_CAP_ATS_SIZEOF 8 + +/* Page Request Interface */ +#define PCI_PRI_CTRL 0x04 /* PRI control register */ +#define PCI_PRI_CTRL_ENABLE 0x01 /* Enable */ +#define PCI_PRI_CTRL_RESET 0x02 /* Reset */ +#define PCI_PRI_STATUS 0x06 /* PRI status register */ +#define PCI_PRI_STATUS_RF 0x001 /* Response Failure */ +#define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */ +#define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */ +#define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ +#define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ +#define PCI_EXT_CAP_PRI_SIZEOF 16 + +/* Process Address Space ID */ +#define PCI_PASID_CAP 0x04 /* PASID feature register */ +#define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ +#define PCI_PASID_CAP_PRIV 0x04 /* Privilege Mode Supported */ +#define PCI_PASID_CTRL 0x06 /* PASID control register */ +#define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ +#define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ +#define PCI_PASID_CTRL_PRIV 0x04 /* Privilege Mode Enable */ +#define PCI_EXT_CAP_PASID_SIZEOF 8 + +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ +#define PCI_EXT_CAP_SRIOV_SIZEOF 64 + +#define PCI_LTR_MAX_SNOOP_LAT 0x4 +#define PCI_LTR_MAX_NOSNOOP_LAT 0x6 +#define PCI_LTR_VALUE_MASK 0x000003ff +#define PCI_LTR_SCALE_MASK 0x00001c00 +#define PCI_LTR_SCALE_SHIFT 10 +#define PCI_EXT_CAP_LTR_SIZEOF 8 + +/* Access Control Service */ +#define PCI_ACS_CAP 0x04 /* ACS Capability Register */ +#define PCI_ACS_SV 0x01 /* Source Validation */ +#define PCI_ACS_TB 0x02 /* Translation Blocking */ +#define PCI_ACS_RR 0x04 /* P2P Request Redirect */ +#define PCI_ACS_CR 0x08 /* P2P Completion Redirect */ +#define PCI_ACS_UF 0x10 /* Upstream Forwarding */ +#define PCI_ACS_EC 0x20 /* P2P Egress Control */ +#define PCI_ACS_DT 0x40 /* Direct Translated P2P */ +#define PCI_ACS_EGRESS_BITS 0x05 /* ACS Egress Control Vector Size */ +#define PCI_ACS_CTRL 0x06 /* ACS Control Register */ +#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ + +#define PCI_VSEC_HDR 4 /* extended cap - vendor-specific */ +#define PCI_VSEC_HDR_LEN_SHIFT 20 /* shift for length field */ + +/* SATA capability */ +#define PCI_SATA_REGS 4 /* SATA REGs specifier */ +#define PCI_SATA_REGS_MASK 0xF /* location - BAR#/inline */ +#define PCI_SATA_REGS_INLINE 0xF /* REGS in config space */ +#define PCI_SATA_SIZEOF_SHORT 8 +#define PCI_SATA_SIZEOF_LONG 16 + +/* Resizable BARs */ +#define PCI_REBAR_CTRL 8 /* control register */ +#define PCI_REBAR_CTRL_NBAR_MASK (7 << 5) /* mask for # bars */ +#define PCI_REBAR_CTRL_NBAR_SHIFT 5 /* shift for # bars */ + +/* Dynamic Power Allocation */ +#define PCI_DPA_CAP 4 /* capability register */ +#define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ +#define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ + +/* TPH Requester */ +#define PCI_TPH_CAP 4 /* capability register */ +#define PCI_TPH_CAP_LOC_MASK 0x600 /* location mask */ +#define PCI_TPH_LOC_NONE 0x000 /* no location */ +#define PCI_TPH_LOC_CAP 0x200 /* in capability */ +#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* st table mask */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* st table shift */ +#define PCI_TPH_BASE_SIZEOF 12 /* size with no st table */ + +#endif /* LINUX_PCI_REGS_H */ diff --git a/tests/kvm-unit-tests/lib/linux/psci.h b/tests/kvm-unit-tests/lib/linux/psci.h new file mode 100644 index 00000000..3d7a0fc0 --- /dev/null +++ b/tests/kvm-unit-tests/lib/linux/psci.h @@ -0,0 +1,108 @@ +/* + * ARM Power State and Coordination Interface (PSCI) header + * + * This header holds common PSCI defines and macros shared + * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space. + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Anup Patel + */ + +#ifndef _UAPI_LINUX_PSCI_H +#define _UAPI_LINUX_PSCI_H + +/* + * PSCI v0.1 interface + * + * The PSCI v0.1 function numbers are implementation defined. + * + * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED, + * INVALID_PARAMS, and DENIED defined below are applicable + * to PSCI v0.1. + */ + +/* PSCI v0.2 interface */ +#define PSCI_0_2_FN_BASE 0x84000000 +#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n)) +#define PSCI_0_2_64BIT 0x40000000 +#define PSCI_0_2_FN64_BASE \ + (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT) +#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n)) + +#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0) +#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1) +#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2) +#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3) +#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4) +#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5) +#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6) +#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7) +#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8) +#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9) + +#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1) +#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3) +#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4) +#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5) +#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + +#define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) +#define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) + +#define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) + +/* PSCI v0.2 power state encoding for CPU_SUSPEND function */ +#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +#define PSCI_0_2_POWER_STATE_ID_SHIFT 0 +#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_0_2_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24 +#define PSCI_0_2_POWER_STATE_AFFL_MASK \ + (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + +/* PSCI extended power state encoding for CPU_SUSPEND function */ +#define PSCI_1_0_EXT_POWER_STATE_ID_MASK 0xfffffff +#define PSCI_1_0_EXT_POWER_STATE_ID_SHIFT 0 +#define PSCI_1_0_EXT_POWER_STATE_TYPE_SHIFT 30 +#define PSCI_1_0_EXT_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_1_0_EXT_POWER_STATE_TYPE_SHIFT) + +/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */ +#define PSCI_0_2_AFFINITY_LEVEL_ON 0 +#define PSCI_0_2_AFFINITY_LEVEL_OFF 1 +#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2 + +/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */ +#define PSCI_0_2_TOS_UP_MIGRATE 0 +#define PSCI_0_2_TOS_UP_NO_MIGRATE 1 +#define PSCI_0_2_TOS_MP 2 + +/* PSCI version decoding (independent of PSCI version) */ +#define PSCI_VERSION_MAJOR_SHIFT 16 +#define PSCI_VERSION_MINOR_MASK \ + ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1) +#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK +#define PSCI_VERSION_MAJOR(ver) \ + (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) +#define PSCI_VERSION_MINOR(ver) \ + ((ver) & PSCI_VERSION_MINOR_MASK) + +/* PSCI features decoding (>=1.0) */ +#define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT 1 +#define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK \ + (0x1 << PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT) + +/* PSCI return values (inclusive of all PSCI versions) */ +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_NOT_SUPPORTED -1 +#define PSCI_RET_INVALID_PARAMS -2 +#define PSCI_RET_DENIED -3 +#define PSCI_RET_ALREADY_ON -4 +#define PSCI_RET_ON_PENDING -5 +#define PSCI_RET_INTERNAL_FAILURE -6 +#define PSCI_RET_NOT_PRESENT -7 +#define PSCI_RET_DISABLED -8 +#define PSCI_RET_INVALID_ADDRESS -9 + +#endif /* _UAPI_LINUX_PSCI_H */ diff --git a/tests/kvm-unit-tests/lib/pci-edu.c b/tests/kvm-unit-tests/lib/pci-edu.c new file mode 100644 index 00000000..f94962f0 --- /dev/null +++ b/tests/kvm-unit-tests/lib/pci-edu.c @@ -0,0 +1,73 @@ +/* + * Edu PCI device. + * + * Copyright (C) 2016 Red Hat, Inc. + * + * Authors: + * Peter Xu , + * + * This work is licensed under the terms of the GNU LGPL, version 2 or + * later. + */ + +#include "pci-edu.h" +#include "asm/barrier.h" + +/* Return true if alive */ +static inline bool edu_check_alive(struct pci_edu_dev *dev) +{ + static uint32_t live_count = 1; + uint32_t value; + + edu_reg_writel(dev, EDU_REG_ALIVE, live_count++); + value = edu_reg_readl(dev, EDU_REG_ALIVE); + return (live_count - 1 == ~value); +} + +bool edu_init(struct pci_edu_dev *dev) +{ + pcidevaddr_t dev_addr; + + dev_addr = pci_find_dev(PCI_VENDOR_ID_QEMU, PCI_DEVICE_ID_EDU); + if (dev_addr == PCIDEVADDR_INVALID) + return false; + + pci_dev_init(&dev->pci_dev, dev_addr); + pci_enable_defaults(&dev->pci_dev); + dev->reg_base = ioremap(dev->pci_dev.resource[EDU_BAR], PAGE_SIZE); + assert(edu_check_alive(dev)); + return true; +} + +void edu_dma(struct pci_edu_dev *dev, iova_t iova, + size_t size, unsigned int dev_offset, bool from_device) +{ + uint64_t from, to; + uint32_t cmd = EDU_CMD_DMA_START; + + assert(size <= EDU_DMA_SIZE_MAX); + assert(dev_offset < EDU_DMA_SIZE_MAX); + + printf("edu device DMA start %s addr 0x%" PRIx64 " size 0x%lu off 0x%x\n", + from_device ? "FROM" : "TO", + iova, (ulong)size, dev_offset); + + if (from_device) { + from = dev_offset + EDU_DMA_START; + to = iova; + cmd |= EDU_CMD_DMA_FROM; + } else { + from = iova; + to = EDU_DMA_START + dev_offset; + cmd |= EDU_CMD_DMA_TO; + } + + edu_reg_writeq(dev, EDU_REG_DMA_SRC, from); + edu_reg_writeq(dev, EDU_REG_DMA_DST, to); + edu_reg_writeq(dev, EDU_REG_DMA_COUNT, size); + edu_reg_writel(dev, EDU_REG_DMA_CMD, cmd); + + /* Wait until DMA finished */ + while (edu_reg_readl(dev, EDU_REG_DMA_CMD) & EDU_CMD_DMA_START) + cpu_relax(); +} diff --git a/tests/kvm-unit-tests/lib/pci-edu.h b/tests/kvm-unit-tests/lib/pci-edu.h new file mode 100644 index 00000000..44b4ba16 --- /dev/null +++ b/tests/kvm-unit-tests/lib/pci-edu.h @@ -0,0 +1,86 @@ +/* + * Edu PCI device header. + * + * Copyright (C) 2016 Red Hat, Inc. + * + * Authors: + * Peter Xu , + * + * This work is licensed under the terms of the GNU LGPL, version 2 or + * later. + * + * Edu device is a virtualized device in QEMU. Please refer to + * docs/specs/edu.txt in QEMU repository for EDU device manual. + */ +#ifndef __PCI_EDU_H__ +#define __PCI_EDU_H__ + +#include "pci.h" +#include "asm/io.h" + +#define PCI_VENDOR_ID_QEMU 0x1234 +#define PCI_DEVICE_ID_EDU 0x11e8 + +/* The only bar used by EDU device */ +#define EDU_BAR 0 +#define EDU_MAGIC 0xed +#define EDU_VERSION 0x100 +#define EDU_DMA_BUF_SIZE (1 << 20) +#define EDU_INPUT_BUF_SIZE 256 + +#define EDU_REG_ID 0x0 +#define EDU_REG_ALIVE 0x4 +#define EDU_REG_FACTORIAL 0x8 +#define EDU_REG_STATUS 0x20 +#define EDU_REG_INTR_STATUS 0x24 +#define EDU_REG_INTR_RAISE 0x60 +#define EDU_REG_INTR_ACK 0x64 +#define EDU_REG_DMA_SRC 0x80 +#define EDU_REG_DMA_DST 0x88 +#define EDU_REG_DMA_COUNT 0x90 +#define EDU_REG_DMA_CMD 0x98 + +#define EDU_CMD_DMA_START 0x01 +#define EDU_CMD_DMA_FROM 0x02 +#define EDU_CMD_DMA_TO 0x00 + +#define EDU_STATUS_FACTORIAL 0x1 +#define EDU_STATUS_INT_ENABLE 0x80 + +#define EDU_DMA_START 0x40000 +#define EDU_DMA_SIZE_MAX 4096 + +struct pci_edu_dev { + struct pci_dev pci_dev; + volatile void *reg_base; +}; + +#define edu_reg(d, r) (volatile void *)((d)->reg_base + (r)) + +static inline uint64_t edu_reg_readq(struct pci_edu_dev *dev, int reg) +{ + return __raw_readq(edu_reg(dev, reg)); +} + +static inline uint32_t edu_reg_readl(struct pci_edu_dev *dev, int reg) +{ + return __raw_readl(edu_reg(dev, reg)); +} + +static inline void edu_reg_writeq(struct pci_edu_dev *dev, int reg, + uint64_t val) +{ + __raw_writeq(val, edu_reg(dev, reg)); +} + +static inline void edu_reg_writel(struct pci_edu_dev *dev, int reg, + uint32_t val) +{ + __raw_writel(val, edu_reg(dev, reg)); +} + +bool edu_init(struct pci_edu_dev *dev); +void edu_dma(struct pci_edu_dev *dev, iova_t iova, + size_t size, unsigned int dev_offset, bool from_device); + +#endif diff --git a/tests/kvm-unit-tests/lib/pci-host-generic.c b/tests/kvm-unit-tests/lib/pci-host-generic.c new file mode 100644 index 00000000..818150dc --- /dev/null +++ b/tests/kvm-unit-tests/lib/pci-host-generic.c @@ -0,0 +1,320 @@ +/* + * Generic PCI host controller as described in PCI Bus Binding to Open Firmware + * + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "devicetree.h" +#include "alloc.h" +#include "pci.h" +#include "asm/pci.h" +#include "asm/io.h" +#include "pci-host-generic.h" +#include + +static struct pci_host_bridge *pci_host_bridge; + +static int of_flags_to_pci_type(u32 of_flags) +{ + static int type_map[] = { + [1] = PCI_BASE_ADDRESS_SPACE_IO, + [2] = PCI_BASE_ADDRESS_MEM_TYPE_32, + [3] = PCI_BASE_ADDRESS_MEM_TYPE_64 + }; + int idx = (of_flags >> 24) & 0x03; + int res; + + assert(idx > 0); + res = type_map[idx]; + + if (of_flags & 0x40000000) + res |= PCI_BASE_ADDRESS_MEM_PREFETCH; + + return res; +} + +static int pci_bar_type(u32 bar) +{ + if (bar & PCI_BASE_ADDRESS_SPACE) + return PCI_BASE_ADDRESS_SPACE_IO; + else + return bar & (PCI_BASE_ADDRESS_MEM_TYPE_MASK | + PCI_BASE_ADDRESS_MEM_PREFETCH); +} + +/* + * Probe DT for a generic PCI host controller + * See kernel Documentation/devicetree/bindings/pci/host-generic-pci.txt + * and function gen_pci_probe() in drivers/pci/host/pci-host-generic.c + */ +static struct pci_host_bridge *pci_dt_probe(void) +{ + struct pci_host_bridge *host; + const void *fdt = dt_fdt(); + const struct fdt_property *prop; + struct dt_pbus_reg base; + struct dt_device dt_dev; + struct dt_bus dt_bus; + struct pci_addr_space *as; + fdt32_t *data; + u32 bus, bus_max; + u32 nac, nsc, nac_root, nsc_root; + int nr_range_cells, nr_addr_spaces; + int ret, node, len, i; + + if (!dt_available()) { + printf("No device tree found\n"); + return NULL; + } + + dt_bus_init_defaults(&dt_bus); + dt_device_init(&dt_dev, &dt_bus, NULL); + + node = fdt_path_offset(fdt, "/"); + assert(node >= 0); + + ret = dt_get_nr_cells(node, &nac_root, &nsc_root); + assert(ret == 0); + assert(nac_root == 1 || nac_root == 2); + + node = fdt_node_offset_by_compatible(fdt, node, + "pci-host-ecam-generic"); + if (node == -FDT_ERR_NOTFOUND) { + printf("No PCIe ECAM compatible controller found\n"); + return NULL; + } + assert(node >= 0); + + prop = fdt_get_property(fdt, node, "device_type", &len); + assert(prop && len == 4 && !strcmp((char *)prop->data, "pci")); + + dt_device_bind_node(&dt_dev, node); + ret = dt_pbus_get_base(&dt_dev, &base); + assert(ret == 0); + + prop = fdt_get_property(fdt, node, "bus-range", &len); + if (prop == NULL) { + assert(len == -FDT_ERR_NOTFOUND); + bus = 0x00; + bus_max = 0xff; + } else { + data = (fdt32_t *)prop->data; + bus = fdt32_to_cpu(data[0]); + bus_max = fdt32_to_cpu(data[1]); + assert(bus <= bus_max); + } + assert(bus_max < base.size / (1 << PCI_ECAM_BUS_SHIFT)); + + ret = dt_get_nr_cells(node, &nac, &nsc); + assert(ret == 0); + assert(nac == 3 && nsc == 2); + + prop = fdt_get_property(fdt, node, "ranges", &len); + assert(prop != NULL); + + nr_range_cells = nac + nsc + nac_root; + nr_addr_spaces = (len / 4) / nr_range_cells; + assert(nr_addr_spaces); + + host = malloc(sizeof(*host) + + sizeof(host->addr_space[0]) * nr_addr_spaces); + assert(host != NULL); + + host->start = base.addr; + host->size = base.size; + host->bus = bus; + host->bus_max = bus_max; + host->nr_addr_spaces = nr_addr_spaces; + + data = (fdt32_t *)prop->data; + as = &host->addr_space[0]; + + for (i = 0; i < nr_addr_spaces; i++) { + /* + * The PCI binding encodes the PCI address with three + * cells as follows: + * + * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr + * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh + * phys.lo cell: llllllll llllllll llllllll llllllll + * + * PCI device bus address and flags are encoded into phys.high + * PCI 64 bit address is encoded into phys.mid and phys.low + */ + as->type = of_flags_to_pci_type(fdt32_to_cpu(data[0])); + as->pci_start = ((u64)fdt32_to_cpu(data[1]) << 32) | + fdt32_to_cpu(data[2]); + + if (nr_range_cells == 6) { + as->start = fdt32_to_cpu(data[3]); + as->size = ((u64)fdt32_to_cpu(data[4]) << 32) | + fdt32_to_cpu(data[5]); + } else { + as->start = ((u64)fdt32_to_cpu(data[3]) << 32) | + fdt32_to_cpu(data[4]); + as->size = ((u64)fdt32_to_cpu(data[5]) << 32) | + fdt32_to_cpu(data[6]); + } + + data += nr_range_cells; + as++; + } + + return host; +} + +static bool pci_alloc_resource(struct pci_dev *dev, int bar_num, u64 *addr) +{ + struct pci_host_bridge *host = pci_host_bridge; + struct pci_addr_space *as = &host->addr_space[0]; + u32 bar; + u64 size, pci_addr; + int type, i; + + *addr = INVALID_PHYS_ADDR; + + size = pci_bar_size(dev, bar_num); + if (!size) + return false; + + bar = pci_bar_get(dev, bar_num); + type = pci_bar_type(bar); + if (type & PCI_BASE_ADDRESS_MEM_TYPE_MASK) + type &= ~PCI_BASE_ADDRESS_MEM_PREFETCH; + + for (i = 0; i < host->nr_addr_spaces; i++) { + if (as->type == type) + break; + as++; + } + + if (i >= host->nr_addr_spaces) { + printf("%s: warning: can't satisfy request for ", __func__); + pci_dev_print_id(dev); + printf(" "); + pci_bar_print(dev, bar_num); + printf("\n"); + return false; + } + + pci_addr = ALIGN(as->pci_start + as->allocated, size); + size += pci_addr - (as->pci_start + as->allocated); + assert(as->allocated + size <= as->size); + *addr = pci_addr; + as->allocated += size; + + return true; +} + +bool pci_probe(void) +{ + struct pci_dev pci_dev; + pcidevaddr_t dev; + u8 header; + u32 cmd; + int i; + + assert(!pci_host_bridge); + pci_host_bridge = pci_dt_probe(); + if (!pci_host_bridge) + return false; + + for (dev = 0; dev < PCI_DEVFN_MAX; dev++) { + if (!pci_dev_exists(dev)) + continue; + + pci_dev_init(&pci_dev, dev); + + /* We are only interested in normal PCI devices */ + header = pci_config_readb(dev, PCI_HEADER_TYPE); + if ((header & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_NORMAL) + continue; + + cmd = PCI_COMMAND_SERR | PCI_COMMAND_PARITY; + + for (i = 0; i < PCI_BAR_NUM; i++) { + u64 addr; + + if (pci_alloc_resource(&pci_dev, i, &addr)) { + pci_bar_set_addr(&pci_dev, i, addr); + + if (pci_bar_is_memory(&pci_dev, i)) + cmd |= PCI_COMMAND_MEMORY; + else + cmd |= PCI_COMMAND_IO; + } + + if (pci_bar_is64(&pci_dev, i)) + i++; + } + + pci_config_writew(dev, PCI_COMMAND, cmd); + } + + return true; +} + +/* + * This function is to be called from pci_translate_addr() to provide + * mapping between this host bridge's PCI busses address and CPU physical + * address. + */ +phys_addr_t pci_host_bridge_get_paddr(u64 pci_addr) +{ + struct pci_host_bridge *host = pci_host_bridge; + struct pci_addr_space *as = &host->addr_space[0]; + int i; + + for (i = 0; i < host->nr_addr_spaces; i++) { + if (pci_addr >= as->pci_start && + pci_addr < as->pci_start + as->size) + return as->start + (pci_addr - as->pci_start); + as++; + } + + return INVALID_PHYS_ADDR; +} + +static void __iomem *pci_get_dev_conf(struct pci_host_bridge *host, int devfn) +{ + return (void __iomem *)(unsigned long) + host->start + (devfn << PCI_ECAM_DEVFN_SHIFT); +} + +u8 pci_config_readb(pcidevaddr_t dev, u8 off) +{ + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); + return readb(conf + off); +} + +u16 pci_config_readw(pcidevaddr_t dev, u8 off) +{ + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); + return readw(conf + off); +} + +u32 pci_config_readl(pcidevaddr_t dev, u8 off) +{ + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); + return readl(conf + off); +} + +void pci_config_writeb(pcidevaddr_t dev, u8 off, u8 val) +{ + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); + writeb(val, conf + off); +} + +void pci_config_writew(pcidevaddr_t dev, u8 off, u16 val) +{ + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); + writew(val, conf + off); +} + +void pci_config_writel(pcidevaddr_t dev, u8 off, u32 val) +{ + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); + writel(val, conf + off); +} diff --git a/tests/kvm-unit-tests/lib/pci-host-generic.h b/tests/kvm-unit-tests/lib/pci-host-generic.h new file mode 100644 index 00000000..fd30e7c7 --- /dev/null +++ b/tests/kvm-unit-tests/lib/pci-host-generic.h @@ -0,0 +1,46 @@ +#ifndef PCI_HOST_GENERIC_H +#define PCI_HOST_GENERIC_H +/* + * PCI host bridge supporting structures and constants + * + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" + +struct pci_addr_space { + phys_addr_t pci_start; + phys_addr_t start; + phys_addr_t size; + phys_addr_t allocated; + int type; +}; + +struct pci_host_bridge { + phys_addr_t start; + phys_addr_t size; + int bus; + int bus_max; + int nr_addr_spaces; + struct pci_addr_space addr_space[]; +}; + +/* + * The following constants are derived from Linux, see this source: + * + * drivers/pci/host/pci-host-generic.c + * struct gen_pci_cfg_bus_ops::bus_shift + * int gen_pci_parse_map_cfg_windows(struct gen_pci *pci) + * + * Documentation/devicetree/bindings/pci/host-generic-pci.txt describes + * ECAM Configuration Space is be memory-mapped by concatenating the various + * components to form an offset: + * + * cfg_offset(bus, device, function, register) = + * bus << 20 | device << 15 | function << 12 | register + */ +#define PCI_ECAM_BUS_SHIFT 20 +#define PCI_ECAM_DEVFN_SHIFT 12 + +#endif diff --git a/tests/kvm-unit-tests/lib/pci-testdev.c b/tests/kvm-unit-tests/lib/pci-testdev.c new file mode 100644 index 00000000..7d298e69 --- /dev/null +++ b/tests/kvm-unit-tests/lib/pci-testdev.c @@ -0,0 +1,194 @@ +/* + * QEMU "pci-testdev" PCI test device + * + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "pci.h" +#include "asm/io.h" + +struct pci_testdev_ops { + u8 (*io_readb)(const volatile void *addr); + u16 (*io_readw)(const volatile void *addr); + u32 (*io_readl)(const volatile void *addr); + void (*io_writeb)(u8 value, volatile void *addr); + void (*io_writew)(u16 value, volatile void *addr); + void (*io_writel)(u32 value, volatile void *addr); +}; + +static u8 pio_readb(const volatile void *addr) +{ + return inb((unsigned long)addr); +} + +static u16 pio_readw(const volatile void *addr) +{ + return inw((unsigned long)addr); +} + +static u32 pio_readl(const volatile void *addr) +{ + return inl((unsigned long)addr); +} + +static void pio_writeb(u8 value, volatile void *addr) +{ + outb(value, (unsigned long)addr); +} + +static void pio_writew(u16 value, volatile void *addr) +{ + outw(value, (unsigned long)addr); +} + +static void pio_writel(u32 value, volatile void *addr) +{ + outl(value, (unsigned long)addr); +} + +static struct pci_testdev_ops pci_testdev_io_ops = { + .io_readb = pio_readb, + .io_readw = pio_readw, + .io_readl = pio_readl, + .io_writeb = pio_writeb, + .io_writew = pio_writew, + .io_writel = pio_writel +}; + +static u8 mmio_readb(const volatile void *addr) +{ + return *(const volatile u8 __force *)addr; +} + +static u16 mmio_readw(const volatile void *addr) +{ + return *(const volatile u16 __force *)addr; +} + +static u32 mmio_readl(const volatile void *addr) +{ + return *(const volatile u32 __force *)addr; +} + +static void mmio_writeb(u8 value, volatile void *addr) +{ + *(volatile u8 __force *)addr = value; +} + +static void mmio_writew(u16 value, volatile void *addr) +{ + *(volatile u16 __force *)addr = value; +} + +static void mmio_writel(u32 value, volatile void *addr) +{ + *(volatile u32 __force *)addr = value; +} + +static struct pci_testdev_ops pci_testdev_mem_ops = { + .io_readb = mmio_readb, + .io_readw = mmio_readw, + .io_readl = mmio_readl, + .io_writeb = mmio_writeb, + .io_writew = mmio_writew, + .io_writel = mmio_writel +}; + +static bool pci_testdev_one(struct pci_test_dev_hdr *test, + int test_nr, + struct pci_testdev_ops *ops) +{ + u8 width; + u32 count, sig, off; + const int nr_writes = 16; + int i; + + ops->io_writeb(test_nr, &test->test); + count = ops->io_readl(&test->count); + if (count != 0) + return false; + + width = ops->io_readb(&test->width); + if (width != 1 && width != 2 && width != 4) + return false; + + sig = ops->io_readl(&test->data); + off = ops->io_readl(&test->offset); + + for (i = 0; i < nr_writes; i++) { + switch (width) { + case 1: ops->io_writeb(sig, (void *)test + off); break; + case 2: ops->io_writew(sig, (void *)test + off); break; + case 4: ops->io_writel(sig, (void *)test + off); break; + } + } + + count = ops->io_readl(&test->count); + if (!count) + return true; + + return (int)count == nr_writes; +} + +void pci_testdev_print(struct pci_test_dev_hdr *test, + struct pci_testdev_ops *ops) +{ + bool io = (ops == &pci_testdev_io_ops); + int i; + + printf("pci-testdev %3s: ", io ? "io" : "mem"); + for (i = 0;; ++i) { + char c = ops->io_readb(&test->name[i]); + if (!c) + break; + printf("%c", c); + } + printf("\n"); +} + +static int pci_testdev_all(struct pci_test_dev_hdr *test, + struct pci_testdev_ops *ops) +{ + int i; + + for (i = 0;; i++) { + if (!pci_testdev_one(test, i, ops)) + break; + pci_testdev_print(test, ops); + } + + return i; +} + +int pci_testdev(void) +{ + struct pci_dev pci_dev; + pcidevaddr_t dev; + phys_addr_t addr; + void __iomem *mem, *io; + int nr_tests = 0; + bool ret; + + dev = pci_find_dev(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_TEST); + if (dev == PCIDEVADDR_INVALID) { + printf("'pci-testdev' device is not found, " + "check QEMU '-device pci-testdev' parameter\n"); + return -1; + } + pci_dev_init(&pci_dev, dev); + + ret = pci_bar_is_valid(&pci_dev, 0) && pci_bar_is_valid(&pci_dev, 1); + assert(ret); + + addr = pci_bar_get_addr(&pci_dev, 0); + mem = ioremap(addr, PAGE_SIZE); + + addr = pci_bar_get_addr(&pci_dev, 1); + io = (void *)(unsigned long)addr; + + nr_tests += pci_testdev_all(mem, &pci_testdev_mem_ops); + nr_tests += pci_testdev_all(io, &pci_testdev_io_ops); + + return nr_tests; +} diff --git a/tests/kvm-unit-tests/lib/pci.c b/tests/kvm-unit-tests/lib/pci.c new file mode 100644 index 00000000..daf39810 --- /dev/null +++ b/tests/kvm-unit-tests/lib/pci.c @@ -0,0 +1,374 @@ +/* + * Copyright (C) 2013, Red Hat Inc, Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include +#include "pci.h" +#include "asm/pci.h" + +void pci_cap_walk(struct pci_dev *dev, pci_cap_handler_t handler) +{ + uint8_t cap_offset; + uint8_t cap_id; + int count = 0; + + cap_offset = pci_config_readb(dev->bdf, PCI_CAPABILITY_LIST); + while (cap_offset) { + cap_id = pci_config_readb(dev->bdf, cap_offset); + assert(cap_id < PCI_CAP_ID_MAX + 1); + handler(dev, cap_offset, cap_id); + cap_offset = pci_config_readb(dev->bdf, cap_offset + 1); + /* Avoid dead loop during cap walk */ + assert(++count <= 255); + } +} + +void pci_msi_set_enable(struct pci_dev *dev, bool enabled) +{ + uint16_t msi_control; + uint16_t offset; + + offset = dev->msi_offset; + msi_control = pci_config_readw(dev->bdf, offset + PCI_MSI_FLAGS); + + if (enabled) + msi_control |= PCI_MSI_FLAGS_ENABLE; + else + msi_control &= ~PCI_MSI_FLAGS_ENABLE; + + pci_config_writew(dev->bdf, offset + PCI_MSI_FLAGS, msi_control); +} + +bool pci_setup_msi(struct pci_dev *dev, uint64_t msi_addr, uint32_t msi_data) +{ + uint16_t msi_control; + uint16_t offset; + pcidevaddr_t addr; + + assert(dev); + + if (!dev->msi_offset) { + printf("MSI: dev 0x%x does not support MSI.\n", dev->bdf); + return false; + } + + addr = dev->bdf; + offset = dev->msi_offset; + msi_control = pci_config_readw(addr, offset + PCI_MSI_FLAGS); + pci_config_writel(addr, offset + PCI_MSI_ADDRESS_LO, + msi_addr & 0xffffffff); + + if (msi_control & PCI_MSI_FLAGS_64BIT) { + pci_config_writel(addr, offset + PCI_MSI_ADDRESS_HI, + (uint32_t)(msi_addr >> 32)); + pci_config_writel(addr, offset + PCI_MSI_DATA_64, msi_data); + } else { + pci_config_writel(addr, offset + PCI_MSI_DATA_32, msi_data); + } + + pci_msi_set_enable(dev, true); + + return true; +} + +void pci_cmd_set_clr(struct pci_dev *dev, uint16_t set, uint16_t clr) +{ + uint16_t val = pci_config_readw(dev->bdf, PCI_COMMAND); + + /* No overlap is allowed */ + assert((set & clr) == 0); + val |= set; + val &= ~clr; + + pci_config_writew(dev->bdf, PCI_COMMAND, val); +} + +bool pci_dev_exists(pcidevaddr_t dev) +{ + return (pci_config_readw(dev, PCI_VENDOR_ID) != 0xffff && + pci_config_readw(dev, PCI_DEVICE_ID) != 0xffff); +} + +/* Scan bus look for a specific device. Only bus 0 scanned for now. */ +pcidevaddr_t pci_find_dev(uint16_t vendor_id, uint16_t device_id) +{ + pcidevaddr_t dev; + + for (dev = 0; dev < PCI_DEVFN_MAX; ++dev) { + if (pci_config_readw(dev, PCI_VENDOR_ID) == vendor_id && + pci_config_readw(dev, PCI_DEVICE_ID) == device_id) + return dev; + } + + return PCIDEVADDR_INVALID; +} + +uint32_t pci_bar_mask(uint32_t bar) +{ + return (bar & PCI_BASE_ADDRESS_SPACE_IO) ? + PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK; +} + +uint32_t pci_bar_get(struct pci_dev *dev, int bar_num) +{ + return pci_config_readl(dev->bdf, PCI_BASE_ADDRESS_0 + + bar_num * 4); +} + +static phys_addr_t __pci_bar_get_addr(struct pci_dev *dev, int bar_num) +{ + uint32_t bar = pci_bar_get(dev, bar_num); + uint32_t mask = pci_bar_mask(bar); + uint64_t addr = bar & mask; + phys_addr_t phys_addr; + + if (pci_bar_is64(dev, bar_num)) + addr |= (uint64_t)pci_bar_get(dev, bar_num + 1) << 32; + + phys_addr = pci_translate_addr(dev->bdf, addr); + assert(phys_addr != INVALID_PHYS_ADDR); + + return phys_addr; +} + +phys_addr_t pci_bar_get_addr(struct pci_dev *dev, int bar_num) +{ + return dev->resource[bar_num]; +} + +void pci_bar_set_addr(struct pci_dev *dev, int bar_num, phys_addr_t addr) +{ + int off = PCI_BASE_ADDRESS_0 + bar_num * 4; + + pci_config_writel(dev->bdf, off, (uint32_t)addr); + dev->resource[bar_num] = addr; + + if (pci_bar_is64(dev, bar_num)) { + assert(bar_num + 1 < PCI_BAR_NUM); + pci_config_writel(dev->bdf, off + 4, (uint32_t)(addr >> 32)); + dev->resource[bar_num + 1] = dev->resource[bar_num]; + } +} + +/* + * To determine the amount of address space needed by a PCI device, + * one must save the original value of the BAR, write a value of + * all 1's to the register, and then read it back. The amount of + * memory can be then determined by masking the information bits, + * performing a bitwise NOT, and incrementing the value by 1. + * + * The following pci_bar_size_helper() and pci_bar_size() functions + * implement the algorithm. + */ +static uint32_t pci_bar_size_helper(struct pci_dev *dev, int bar_num) +{ + int off = PCI_BASE_ADDRESS_0 + bar_num * 4; + uint16_t bdf = dev->bdf; + uint32_t bar, val; + + bar = pci_config_readl(bdf, off); + pci_config_writel(bdf, off, ~0u); + val = pci_config_readl(bdf, off); + pci_config_writel(bdf, off, bar); + + return val; +} + +phys_addr_t pci_bar_size(struct pci_dev *dev, int bar_num) +{ + uint32_t bar, size; + + size = pci_bar_size_helper(dev, bar_num); + if (!size) + return 0; + + bar = pci_bar_get(dev, bar_num); + size &= pci_bar_mask(bar); + + if (pci_bar_is64(dev, bar_num)) { + phys_addr_t size64 = pci_bar_size_helper(dev, bar_num + 1); + size64 = (size64 << 32) | size; + + return ~size64 + 1; + } else { + return ~size + 1; + } +} + +bool pci_bar_is_memory(struct pci_dev *dev, int bar_num) +{ + uint32_t bar = pci_bar_get(dev, bar_num); + + return !(bar & PCI_BASE_ADDRESS_SPACE_IO); +} + +bool pci_bar_is_valid(struct pci_dev *dev, int bar_num) +{ + return dev->resource[bar_num] != INVALID_PHYS_ADDR; +} + +bool pci_bar_is64(struct pci_dev *dev, int bar_num) +{ + uint32_t bar = pci_bar_get(dev, bar_num); + + if (bar & PCI_BASE_ADDRESS_SPACE_IO) + return false; + + return (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == + PCI_BASE_ADDRESS_MEM_TYPE_64; +} + +void pci_bar_print(struct pci_dev *dev, int bar_num) +{ + phys_addr_t size, start, end; + uint32_t bar; + + if (!pci_bar_is_valid(dev, bar_num)) + return; + + bar = pci_bar_get(dev, bar_num); + size = pci_bar_size(dev, bar_num); + start = pci_bar_get_addr(dev, bar_num); + end = start + size - 1; + + if (pci_bar_is64(dev, bar_num)) { + printf("BAR#%d,%d [%" PRIx64 "-%" PRIx64 " ", + bar_num, bar_num + 1, start, end); + } else { + printf("BAR#%d [%02x-%02x ", + bar_num, (uint32_t)start, (uint32_t)end); + } + + if (bar & PCI_BASE_ADDRESS_SPACE_IO) { + printf("PIO"); + } else { + printf("MEM"); + switch (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) { + case PCI_BASE_ADDRESS_MEM_TYPE_32: + printf("32"); + break; + case PCI_BASE_ADDRESS_MEM_TYPE_1M: + printf("1M"); + break; + case PCI_BASE_ADDRESS_MEM_TYPE_64: + printf("64"); + break; + default: + assert(0); + } + } + + if (bar & PCI_BASE_ADDRESS_MEM_PREFETCH) + printf("/p"); + + printf("]"); +} + +void pci_dev_print_id(struct pci_dev *dev) +{ + pcidevaddr_t bdf = dev->bdf; + + printf("00.%02x.%1x %04x:%04x", bdf / 8, bdf % 8, + pci_config_readw(bdf, PCI_VENDOR_ID), + pci_config_readw(bdf, PCI_DEVICE_ID)); +} + +static void pci_cap_print(struct pci_dev *dev, int cap_offset, int cap_id) +{ + switch (cap_id) { + case PCI_CAP_ID_MSI: { + uint16_t control = pci_config_readw(dev->bdf, cap_offset + PCI_MSI_FLAGS); + printf("\tMSI,%s-bit capability ", control & PCI_MSI_FLAGS_64BIT ? "64" : "32"); + break; + } + default: + printf("\tcapability 0x%02x ", cap_id); + break; + } + printf("at offset 0x%02x\n", cap_offset); +} + +void pci_dev_print(struct pci_dev *dev) +{ + pcidevaddr_t bdf = dev->bdf; + uint8_t header = pci_config_readb(bdf, PCI_HEADER_TYPE); + uint8_t progif = pci_config_readb(bdf, PCI_CLASS_PROG); + uint8_t subclass = pci_config_readb(bdf, PCI_CLASS_DEVICE); + uint8_t class = pci_config_readb(bdf, PCI_CLASS_DEVICE + 1); + int i; + + pci_dev_print_id(dev); + printf(" type %02x progif %02x class %02x subclass %02x\n", + header, progif, class, subclass); + + pci_cap_walk(dev, pci_cap_print); + + if ((header & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_NORMAL) + return; + + for (i = 0; i < PCI_BAR_NUM; i++) { + if (pci_bar_is_valid(dev, i)) { + printf("\t"); + pci_bar_print(dev, i); + printf("\n"); + } + if (pci_bar_is64(dev, i)) + i++; + } +} + +void pci_print(void) +{ + pcidevaddr_t devfn; + struct pci_dev pci_dev; + + for (devfn = 0; devfn < PCI_DEVFN_MAX; ++devfn) { + if (pci_dev_exists(devfn)) { + pci_dev_init(&pci_dev, devfn); + pci_dev_print(&pci_dev); + } + } +} + +void pci_dev_init(struct pci_dev *dev, pcidevaddr_t bdf) +{ + int i; + + memset(dev, 0, sizeof(*dev)); + dev->bdf = bdf; + + for (i = 0; i < PCI_BAR_NUM; i++) { + if (pci_bar_size(dev, i)) { + dev->resource[i] = __pci_bar_get_addr(dev, i); + if (pci_bar_is64(dev, i)) { + assert(i + 1 < PCI_BAR_NUM); + dev->resource[i + 1] = dev->resource[i]; + i++; + } + } else { + dev->resource[i] = INVALID_PHYS_ADDR; + } + } +} + +uint8_t pci_intx_line(struct pci_dev *dev) +{ + return pci_config_readb(dev->bdf, PCI_INTERRUPT_LINE); +} + +static void pci_cap_setup(struct pci_dev *dev, int cap_offset, int cap_id) +{ + switch (cap_id) { + case PCI_CAP_ID_MSI: + dev->msi_offset = cap_offset; + break; + } +} + +void pci_enable_defaults(struct pci_dev *dev) +{ + /* Enable device DMA operations */ + pci_cmd_set_clr(dev, PCI_COMMAND_MASTER, 0); + pci_cap_walk(dev, pci_cap_setup); +} diff --git a/tests/kvm-unit-tests/lib/pci.h b/tests/kvm-unit-tests/lib/pci.h new file mode 100644 index 00000000..03cc0a72 --- /dev/null +++ b/tests/kvm-unit-tests/lib/pci.h @@ -0,0 +1,102 @@ +#ifndef PCI_H +#define PCI_H +/* + * API for scanning a PCI bus for a given device, as well to access + * BAR registers. + * + * Copyright (C) 2013, Red Hat Inc, Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" + +typedef uint16_t pcidevaddr_t; +enum { + PCIDEVADDR_INVALID = 0xffff, +}; + +#define PCI_BAR_NUM 6 +#define PCI_DEVFN_MAX 256 + +#define PCI_BDF_GET_DEVFN(x) ((x) & 0xff) +#define PCI_BDF_GET_BUS(x) (((x) >> 8) & 0xff) + +struct pci_dev { + uint16_t bdf; + uint16_t msi_offset; + phys_addr_t resource[PCI_BAR_NUM]; +}; + +extern void pci_dev_init(struct pci_dev *dev, pcidevaddr_t bdf); +extern void pci_cmd_set_clr(struct pci_dev *dev, uint16_t set, uint16_t clr); +typedef void (*pci_cap_handler_t)(struct pci_dev *dev, int cap_offset, int cap_id); +extern void pci_cap_walk(struct pci_dev *dev, pci_cap_handler_t handler); +extern void pci_enable_defaults(struct pci_dev *dev); +extern bool pci_setup_msi(struct pci_dev *dev, uint64_t msi_addr, + uint32_t msi_data); + +typedef phys_addr_t iova_t; + +extern bool pci_probe(void); +extern void pci_print(void); +extern bool pci_dev_exists(pcidevaddr_t dev); +extern pcidevaddr_t pci_find_dev(uint16_t vendor_id, uint16_t device_id); + +/* + * @bar_num in all BAR access functions below is the index of the 32-bit + * register starting from the PCI_BASE_ADDRESS_0 offset. + * + * In cases where the BAR size is 64-bit, a caller should still provide + * @bar_num in terms of 32-bit words. For example, if a device has a 64-bit + * BAR#0 and a 32-bit BAR#1, then caller should provide 2 to address BAR#1, + * not 1. + * + * It is expected the caller is aware of the device BAR layout and never + * tries to address the middle of a 64-bit register. + */ +extern phys_addr_t pci_bar_get_addr(struct pci_dev *dev, int bar_num); +extern void pci_bar_set_addr(struct pci_dev *dev, int bar_num, phys_addr_t addr); +extern phys_addr_t pci_bar_size(struct pci_dev *dev, int bar_num); +extern uint32_t pci_bar_get(struct pci_dev *dev, int bar_num); +extern uint32_t pci_bar_mask(uint32_t bar); +extern bool pci_bar_is64(struct pci_dev *dev, int bar_num); +extern bool pci_bar_is_memory(struct pci_dev *dev, int bar_num); +extern bool pci_bar_is_valid(struct pci_dev *dev, int bar_num); +extern void pci_bar_print(struct pci_dev *dev, int bar_num); +extern void pci_dev_print_id(struct pci_dev *dev); +extern void pci_dev_print(struct pci_dev *dev); +extern uint8_t pci_intx_line(struct pci_dev *dev); +void pci_msi_set_enable(struct pci_dev *dev, bool enabled); + +extern int pci_testdev(void); + +/* + * pci-testdev is a driver for the pci-testdev qemu pci device. The + * device enables testing mmio and portio exits, and measuring their + * speed. + */ +#define PCI_VENDOR_ID_REDHAT 0x1b36 +#define PCI_DEVICE_ID_REDHAT_TEST 0x0005 + +/* + * pci-testdev supports at least three types of tests (via mmio and + * portio BARs): no-eventfd, wildcard-eventfd and datamatch-eventfd + */ +#define PCI_TESTDEV_BAR_MEM 0 +#define PCI_TESTDEV_BAR_IO 1 +#define PCI_TESTDEV_NUM_BARS 2 +#define PCI_TESTDEV_NUM_TESTS 3 + +struct pci_test_dev_hdr { + uint8_t test; + uint8_t width; + uint8_t pad0[2]; + uint32_t offset; + uint32_t data; + uint32_t count; + uint8_t name[]; +}; + +#define PCI_HEADER_TYPE_MASK 0x7f + +#endif /* PCI_H */ diff --git a/tests/kvm-unit-tests/lib/printf.c b/tests/kvm-unit-tests/lib/printf.c new file mode 100644 index 00000000..2aec59aa --- /dev/null +++ b/tests/kvm-unit-tests/lib/printf.c @@ -0,0 +1,261 @@ +#include "libcflat.h" + +#define BUFSZ 2000 + +typedef struct pstream { + char *buffer; + int remain; + int added; +} pstream_t; + +typedef struct strprops { + char pad; + int npad; +} strprops_t; + +static void addchar(pstream_t *p, char c) +{ + if (p->remain) { + *p->buffer++ = c; + --p->remain; + } + ++p->added; +} + +void print_str(pstream_t *p, const char *s, strprops_t props) +{ + const char *s_orig = s; + int npad = props.npad; + + if (npad > 0) { + npad -= strlen(s_orig); + while (npad > 0) { + addchar(p, props.pad); + --npad; + } + } + + while (*s) + addchar(p, *s++); + + if (npad < 0) { + props.pad = ' '; /* ignore '0' flag with '-' flag */ + npad += strlen(s_orig); + while (npad < 0) { + addchar(p, props.pad); + ++npad; + } + } +} + +static char digits[16] = "0123456789abcdef"; + +void print_int(pstream_t *ps, long long n, int base, strprops_t props) +{ + char buf[sizeof(long) * 3 + 2], *p = buf; + int s = 0, i; + + if (n < 0) { + n = -n; + s = 1; + } + + while (n) { + *p++ = digits[n % base]; + n /= base; + } + + if (s) + *p++ = '-'; + + if (p == buf) + *p++ = '0'; + + for (i = 0; i < (p - buf) / 2; ++i) { + char tmp; + + tmp = buf[i]; + buf[i] = p[-1-i]; + p[-1-i] = tmp; + } + + *p = 0; + + print_str(ps, buf, props); +} + +void print_unsigned(pstream_t *ps, unsigned long long n, int base, + strprops_t props) +{ + char buf[sizeof(long) * 3 + 1], *p = buf; + int i; + + while (n) { + *p++ = digits[n % base]; + n /= base; + } + + if (p == buf) + *p++ = '0'; + + for (i = 0; i < (p - buf) / 2; ++i) { + char tmp; + + tmp = buf[i]; + buf[i] = p[-1-i]; + p[-1-i] = tmp; + } + + *p = 0; + + print_str(ps, buf, props); +} + +static int fmtnum(const char **fmt) +{ + const char *f = *fmt; + int len = 0, num; + + if (*f == '-') + ++f, ++len; + + while (*f >= '0' && *f <= '9') + ++f, ++len; + + num = atol(*fmt); + *fmt += len; + return num; +} + +int vsnprintf(char *buf, int size, const char *fmt, va_list va) +{ + pstream_t s; + + s.buffer = buf; + s.remain = size - 1; + s.added = 0; + while (*fmt) { + char f = *fmt++; + int nlong = 0; + strprops_t props; + memset(&props, 0, sizeof(props)); + props.pad = ' '; + + if (f != '%') { + addchar(&s, f); + continue; + } + morefmt: + f = *fmt++; + switch (f) { + case '%': + addchar(&s, '%'); + break; + case 'c': + addchar(&s, va_arg(va, int)); + break; + case '\0': + --fmt; + break; + case '0': + props.pad = '0'; + ++fmt; + /* fall through */ + case '1'...'9': + case '-': + --fmt; + props.npad = fmtnum(&fmt); + goto morefmt; + case 'l': + ++nlong; + goto morefmt; + case 'd': + switch (nlong) { + case 0: + print_int(&s, va_arg(va, int), 10, props); + break; + case 1: + print_int(&s, va_arg(va, long), 10, props); + break; + default: + print_int(&s, va_arg(va, long long), 10, props); + break; + } + break; + case 'u': + switch (nlong) { + case 0: + print_unsigned(&s, va_arg(va, unsigned), 10, props); + break; + case 1: + print_unsigned(&s, va_arg(va, unsigned long), 10, props); + break; + default: + print_unsigned(&s, va_arg(va, unsigned long long), 10, props); + break; + } + break; + case 'x': + switch (nlong) { + case 0: + print_unsigned(&s, va_arg(va, unsigned), 16, props); + break; + case 1: + print_unsigned(&s, va_arg(va, unsigned long), 16, props); + break; + default: + print_unsigned(&s, va_arg(va, unsigned long long), 16, props); + break; + } + break; + case 'p': + print_str(&s, "0x", props); + print_unsigned(&s, (unsigned long)va_arg(va, void *), 16, props); + break; + case 's': + print_str(&s, va_arg(va, const char *), props); + break; + default: + addchar(&s, f); + break; + } + } + *s.buffer = 0; + ++s.added; + return s.added; +} + + +int snprintf(char *buf, int size, const char *fmt, ...) +{ + va_list va; + int r; + + va_start(va, fmt); + r = vsnprintf(buf, size, fmt, va); + va_end(va); + return r; +} + +int vprintf(const char *fmt, va_list va) +{ + char buf[BUFSZ]; + int r; + + r = vsnprintf(buf, sizeof(buf), fmt, va); + puts(buf); + return r; +} + +int printf(const char *fmt, ...) +{ + va_list va; + char buf[BUFSZ]; + int r; + + va_start(va, fmt); + r = vsnprintf(buf, sizeof buf, fmt, va); + va_end(va); + puts(buf); + return r; +} diff --git a/tests/kvm-unit-tests/lib/report.c b/tests/kvm-unit-tests/lib/report.c new file mode 100644 index 00000000..e24e8138 --- /dev/null +++ b/tests/kvm-unit-tests/lib/report.c @@ -0,0 +1,145 @@ +/* + * Test result reporting + * + * Copyright (c) Siemens AG, 2014 + * + * Authors: + * Jan Kiszka + * Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ + +#include "libcflat.h" +#include "asm/spinlock.h" + +static unsigned int tests, failures, xfailures, skipped; +static char prefixes[256]; +static struct spinlock lock; + +void report_prefix_push(const char *prefix) +{ + spin_lock(&lock); + strcat(prefixes, prefix); + strcat(prefixes, ": "); + spin_unlock(&lock); +} + +void report_prefix_pop(void) +{ + char *p, *q; + + spin_lock(&lock); + + if (!*prefixes) + return; + + for (p = prefixes, q = strstr(p, ": ") + 2; + *q; + p = q, q = strstr(p, ": ") + 2) + ; + *p = '\0'; + + spin_unlock(&lock); +} + +static void va_report(const char *msg_fmt, + bool pass, bool xfail, bool skip, va_list va) +{ + char *prefix = skip ? "SKIP" + : xfail ? (pass ? "XPASS" : "XFAIL") + : (pass ? "PASS" : "FAIL"); + + spin_lock(&lock); + + tests++; + printf("%s: ", prefix); + puts(prefixes); + vprintf(msg_fmt, va); + puts("\n"); + if (skip) + skipped++; + else if (xfail && !pass) + xfailures++; + else if (xfail || !pass) + failures++; + + spin_unlock(&lock); +} + +void report(const char *msg_fmt, bool pass, ...) +{ + va_list va; + va_start(va, pass); + va_report(msg_fmt, pass, false, false, va); + va_end(va); +} + +void report_xfail(const char *msg_fmt, bool xfail, bool pass, ...) +{ + va_list va; + va_start(va, pass); + va_report(msg_fmt, pass, xfail, false, va); + va_end(va); +} + +void report_skip(const char *msg_fmt, ...) +{ + va_list va; + va_start(va, msg_fmt); + va_report(msg_fmt, false, false, true, va); + va_end(va); +} + +void report_info(const char *msg_fmt, ...) +{ + va_list va; + + spin_lock(&lock); + puts("INFO: "); + puts(prefixes); + va_start(va, msg_fmt); + vprintf(msg_fmt, va); + va_end(va); + puts("\n"); + spin_unlock(&lock); +} + +int report_summary(void) +{ + spin_lock(&lock); + + printf("SUMMARY: %d tests", tests); + if (failures) + printf(", %d unexpected failures", failures); + if (xfailures) + printf(", %d expected failures", xfailures); + if (skipped) + printf(", %d skipped", skipped); + printf("\n"); + + if (tests == skipped) + /* Blame AUTOTOOLS for using 77 for skipped test and QEMU for + * mangling error codes in a way that gets 77 if we ... */ + return 77 >> 1; + + return failures > 0 ? 1 : 0; + + spin_unlock(&lock); +} + +void report_abort(const char *msg_fmt, ...) +{ + va_list va; + + spin_lock(&lock); + puts("ABORT: "); + puts(prefixes); + va_start(va, msg_fmt); + vprintf(msg_fmt, va); + va_end(va); + puts("\n"); + spin_unlock(&lock); + report_summary(); + abort(); +} diff --git a/tests/kvm-unit-tests/lib/setjmp.h b/tests/kvm-unit-tests/lib/setjmp.h new file mode 100644 index 00000000..334f466f --- /dev/null +++ b/tests/kvm-unit-tests/lib/setjmp.h @@ -0,0 +1,12 @@ +#ifndef LIBCFLAT_SETJMP_H +#define LIBCFLAT_SETJMP_H 1 + +typedef struct jmp_buf_tag { + long int regs[8]; +} jmp_buf[1]; + +extern int setjmp (struct jmp_buf_tag env[1]); +extern void longjmp (struct jmp_buf_tag env[1], int val) + __attribute__ ((__noreturn__)); + +#endif /* setjmp.h */ diff --git a/tests/kvm-unit-tests/lib/stack.c b/tests/kvm-unit-tests/lib/stack.c new file mode 100644 index 00000000..b0a02950 --- /dev/null +++ b/tests/kvm-unit-tests/lib/stack.c @@ -0,0 +1,96 @@ +#include +#include + +#define MAX_DEPTH 20 + +static void print_stack(const void **return_addrs, int depth, + bool top_is_return_address) +{ + int i = 0; + + printf("\tSTACK:"); + + /* @addr indicates a non-return address, as expected by the stack + * pretty printer script. */ + if (depth > 0 && !top_is_return_address) { + printf(" @%lx", (unsigned long) return_addrs[0]); + i++; + } + + for (; i < depth; i++) { + printf(" %lx", (unsigned long) return_addrs[i]); + } + printf("\n"); +} + +void dump_stack(void) +{ + const void *return_addrs[MAX_DEPTH]; + int depth; + + depth = backtrace(return_addrs, MAX_DEPTH); + print_stack(&return_addrs[1], depth ? depth - 1 : 0, true); +} + +void dump_frame_stack(const void *instruction, const void *frame) +{ + const void *return_addrs[MAX_DEPTH]; + int depth; + + return_addrs[0] = instruction; + depth = backtrace_frame(frame, &return_addrs[1], MAX_DEPTH - 1); + print_stack(return_addrs, depth + 1, false); +} + +#ifndef HAVE_ARCH_BACKTRACE +int backtrace(const void **return_addrs, int max_depth) +{ + static int walking; + int depth = 0; + void *addr; + + if (walking) { + printf("RECURSIVE STACK WALK!!!\n"); + return 0; + } + walking = 1; + + /* __builtin_return_address requires a compile-time constant argument */ +#define GET_RETURN_ADDRESS(i) \ + if (max_depth == i) \ + goto done; \ + addr = __builtin_return_address(i); \ + if (!addr) \ + goto done; \ + return_addrs[i] = __builtin_extract_return_addr(addr); \ + depth = i + 1; \ + + GET_RETURN_ADDRESS(0) + GET_RETURN_ADDRESS(1) + GET_RETURN_ADDRESS(2) + GET_RETURN_ADDRESS(3) + GET_RETURN_ADDRESS(4) + GET_RETURN_ADDRESS(5) + GET_RETURN_ADDRESS(6) + GET_RETURN_ADDRESS(7) + GET_RETURN_ADDRESS(8) + GET_RETURN_ADDRESS(9) + GET_RETURN_ADDRESS(10) + GET_RETURN_ADDRESS(11) + GET_RETURN_ADDRESS(12) + GET_RETURN_ADDRESS(13) + GET_RETURN_ADDRESS(14) + GET_RETURN_ADDRESS(15) + GET_RETURN_ADDRESS(16) + GET_RETURN_ADDRESS(17) + GET_RETURN_ADDRESS(18) + GET_RETURN_ADDRESS(19) + GET_RETURN_ADDRESS(20) + +#undef GET_RETURN_ADDRESS + +done: + walking = 0; + return depth; +} +#endif /* HAVE_ARCH_BACKTRACE */ diff --git a/tests/kvm-unit-tests/lib/stack.h b/tests/kvm-unit-tests/lib/stack.h new file mode 100644 index 00000000..cfc66f44 --- /dev/null +++ b/tests/kvm-unit-tests/lib/stack.h @@ -0,0 +1,21 @@ +#ifndef _STACK_H_ +#define _STACK_H_ + +#include +#include + +#ifdef HAVE_ARCH_BACKTRACE_FRAME +extern int backtrace_frame(const void *frame, const void **return_addrs, + int max_depth); +#else +static inline int +backtrace_frame(const void *frame __unused, const void **return_addrs __unused, + int max_depth __unused) +{ + return 0; +} +#endif + +extern int backtrace(const void **return_addrs, int max_depth); + +#endif diff --git a/tests/kvm-unit-tests/lib/string.c b/tests/kvm-unit-tests/lib/string.c new file mode 100644 index 00000000..833f22be --- /dev/null +++ b/tests/kvm-unit-tests/lib/string.c @@ -0,0 +1,175 @@ +#include "libcflat.h" + +unsigned long strlen(const char *buf) +{ + unsigned long len = 0; + + while (*buf++) + ++len; + return len; +} + +char *strcat(char *dest, const char *src) +{ + char *p = dest; + + while (*p) + ++p; + while ((*p++ = *src++) != 0) + ; + return dest; +} + +char *strcpy(char *dest, const char *src) +{ + *dest = 0; + return strcat(dest, src); +} + +int strncmp(const char *a, const char *b, size_t n) +{ + for (; n--; ++a, ++b) + if (*a != *b || *a == '\0') + return *a - *b; + + return 0; +} + +int strcmp(const char *a, const char *b) +{ + return strncmp(a, b, SIZE_MAX); +} + +char *strchr(const char *s, int c) +{ + while (*s != (char)c) + if (*s++ == '\0') + return NULL; + return (char *)s; +} + +char *strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} + +void *memset(void *s, int c, size_t n) +{ + size_t i; + char *a = s; + + for (i = 0; i < n; ++i) + a[i] = c; + + return s; +} + +void *memcpy(void *dest, const void *src, size_t n) +{ + size_t i; + char *a = dest; + const char *b = src; + + for (i = 0; i < n; ++i) + a[i] = b[i]; + + return dest; +} + +int memcmp(const void *s1, const void *s2, size_t n) +{ + const unsigned char *a = s1, *b = s2; + int ret = 0; + + while (n--) { + ret = *a - *b; + if (ret) + break; + ++a, ++b; + } + return ret; +} + +void *memmove(void *dest, const void *src, size_t n) +{ + const unsigned char *s = src; + unsigned char *d = dest; + + if (d <= s) { + while (n--) + *d++ = *s++; + } else { + d += n, s += n; + while (n--) + *--d = *--s; + } + return dest; +} + +void *memchr(const void *s, int c, size_t n) +{ + const unsigned char *str = s, chr = (unsigned char)c; + + while (n--) + if (*str++ == chr) + return (void *)(str - 1); + return NULL; +} + +long atol(const char *ptr) +{ + long acc = 0; + const char *s = ptr; + int neg, c; + + while (*s == ' ' || *s == '\t') + s++; + if (*s == '-'){ + neg = 1; + s++; + } else { + neg = 0; + if (*s == '+') + s++; + } + + while (*s) { + if (*s < '0' || *s > '9') + break; + c = *s - '0'; + acc = acc * 10 + c; + s++; + } + + if (neg) + acc = -acc; + + return acc; +} + +extern char **environ; + +char *getenv(const char *name) +{ + char **envp = environ, *delim; + + while (*envp) { + delim = strchr(*envp, '='); + if (delim && strncmp(name, *envp, delim - *envp) == 0) + return delim + 1; + ++envp; + } + return NULL; +} diff --git a/tests/kvm-unit-tests/lib/string.h b/tests/kvm-unit-tests/lib/string.h new file mode 100644 index 00000000..2391013a --- /dev/null +++ b/tests/kvm-unit-tests/lib/string.h @@ -0,0 +1,17 @@ +#ifndef __STRING_H +#define __STRING_H + +extern unsigned long strlen(const char *buf); +extern char *strcat(char *dest, const char *src); +extern char *strcpy(char *dest, const char *src); +extern int strcmp(const char *a, const char *b); +extern int strncmp(const char *a, const char *b, size_t n); +extern char *strchr(const char *s, int c); +extern char *strstr(const char *haystack, const char *needle); +extern void *memset(void *s, int c, size_t n); +extern void *memcpy(void *dest, const void *src, size_t n); +extern int memcmp(const void *s1, const void *s2, size_t n); +extern void *memmove(void *dest, const void *src, size_t n); +extern void *memchr(const void *s, int c, size_t n); + +#endif /* _STRING_H */ diff --git a/tests/kvm-unit-tests/lib/util.c b/tests/kvm-unit-tests/lib/util.c new file mode 100644 index 00000000..69b18100 --- /dev/null +++ b/tests/kvm-unit-tests/lib/util.c @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2016, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include + +int parse_keyval(char *s, long *val) +{ + char *p; + + p = strchr(s, '='); + if (!p) + return -1; + + *val = atol(p+1); + return p - s; +} diff --git a/tests/kvm-unit-tests/lib/util.h b/tests/kvm-unit-tests/lib/util.h new file mode 100644 index 00000000..4c4b4413 --- /dev/null +++ b/tests/kvm-unit-tests/lib/util.h @@ -0,0 +1,23 @@ +#ifndef _UTIL_H_ +#define _UTIL_H_ +/* + * Collection of utility functions to share between unit tests. + * + * Copyright (C) 2016, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ + +/* + * parse_keyval extracts the integer from a string formatted as + * string=integer. This is useful for passing expected values to + * the unit test on the command line, i.e. it helps parse QEMU + * command lines that include something like -append var1=1 var2=2 + * @s is the input string, likely a command line parameter, and + * @val is a pointer to where the integer will be stored. + * + * Returns the offset of the '=', or -1 if no keyval pair is found. + */ +extern int parse_keyval(char *s, long *val); + +#endif diff --git a/tests/kvm-unit-tests/lib/virtio-mmio.c b/tests/kvm-unit-tests/lib/virtio-mmio.c new file mode 100644 index 00000000..fa8dd5b8 --- /dev/null +++ b/tests/kvm-unit-tests/lib/virtio-mmio.c @@ -0,0 +1,177 @@ +/* + * virtqueue support adapted from the Linux kernel. + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "devicetree.h" +#include "alloc.h" +#include "asm/page.h" +#include "asm/io.h" +#include "virtio.h" +#include "virtio-mmio.h" + +static void vm_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + u8 *p = buf; + unsigned i; + + for (i = 0; i < len; ++i) + p[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); +} + +static void vm_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + const u8 *p = buf; + unsigned i; + + for (i = 0; i < len; ++i) + writeb(p[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); +} + +static bool vm_notify(struct virtqueue *vq) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); + writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); + return true; +} + +static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, + unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + struct vring_virtqueue *vq; + void *queue; + unsigned num = VIRTIO_MMIO_QUEUE_NUM_MIN; + + vq = calloc(1, sizeof(*vq)); + queue = memalign(PAGE_SIZE, VIRTIO_MMIO_QUEUE_SIZE_MIN); + assert(vq && queue); + + writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); + + assert(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX) >= num); + + if (readl(vm_dev->base + VIRTIO_MMIO_QUEUE_PFN) != 0) { + printf("%s: virtqueue %d already setup! base=%p\n", + __func__, index, vm_dev->base); + return NULL; + } + + writel(num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); + writel(VIRTIO_MMIO_VRING_ALIGN, + vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN); + writel(virt_to_pfn(queue), vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + + vring_init_virtqueue(vq, index, num, VIRTIO_MMIO_VRING_ALIGN, + vdev, queue, vm_notify, callback, name); + + return &vq->vq; +} + +static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char *names[]) +{ + unsigned i; + + for (i = 0; i < nvqs; ++i) { + vqs[i] = vm_setup_vq(vdev, i, + callbacks ? callbacks[i] : NULL, + names ? names[i] : ""); + if (vqs[i] == NULL) + return -1; + } + + return 0; +} + +static const struct virtio_config_ops vm_config_ops = { + .get = vm_get, + .set = vm_set, + .find_vqs = vm_find_vqs, +}; + +static void vm_device_init(struct virtio_mmio_device *vm_dev) +{ + vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID); + vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); + vm_dev->vdev.config = &vm_config_ops; + + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); +} + +/****************************************************** + * virtio-mmio device tree support + ******************************************************/ + +struct vm_dt_info { + u32 devid; + void *base; +}; + +static int vm_dt_match(const struct dt_device *dev, int fdtnode) +{ + struct vm_dt_info *info = (struct vm_dt_info *)dev->info; + struct dt_pbus_reg base; + u32 magic; + int ret; + + dt_device_bind_node((struct dt_device *)dev, fdtnode); + + ret = dt_pbus_get_base(dev, &base); + assert(ret == 0); + info->base = ioremap(base.addr, base.size); + + magic = readl(info->base + VIRTIO_MMIO_MAGIC_VALUE); + if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) + return false; + + return readl(info->base + VIRTIO_MMIO_DEVICE_ID) == info->devid; +} + +static struct virtio_device *virtio_mmio_dt_bind(u32 devid) +{ + struct virtio_mmio_device *vm_dev; + struct dt_device dt_dev; + struct dt_bus dt_bus; + struct vm_dt_info info; + int node; + + if (!dt_available()) + return NULL; + + dt_bus_init_defaults(&dt_bus); + dt_bus.match = vm_dt_match; + + info.devid = devid; + + dt_device_init(&dt_dev, &dt_bus, &info); + + node = dt_device_find_compatible(&dt_dev, "virtio,mmio"); + assert(node >= 0 || node == -FDT_ERR_NOTFOUND); + + if (node == -FDT_ERR_NOTFOUND) + return NULL; + + vm_dev = calloc(1, sizeof(*vm_dev)); + assert(vm_dev != NULL); + + vm_dev->base = info.base; + vm_device_init(vm_dev); + + return &vm_dev->vdev; +} + +struct virtio_device *virtio_mmio_bind(u32 devid) +{ + return virtio_mmio_dt_bind(devid); +} diff --git a/tests/kvm-unit-tests/lib/virtio-mmio.h b/tests/kvm-unit-tests/lib/virtio-mmio.h new file mode 100644 index 00000000..8046a474 --- /dev/null +++ b/tests/kvm-unit-tests/lib/virtio-mmio.h @@ -0,0 +1,65 @@ +#ifndef _VIRTIO_MMIO_H_ +#define _VIRTIO_MMIO_H_ +/* + * A minimal implementation of virtio-mmio. Adapted from the Linux Kernel. + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "asm/page.h" +#include "virtio.h" + +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 +#define VIRTIO_MMIO_VERSION 0x004 +#define VIRTIO_MMIO_DEVICE_ID 0x008 +#define VIRTIO_MMIO_VENDOR_ID 0x00c +#define VIRTIO_MMIO_HOST_FEATURES 0x010 +#define VIRTIO_MMIO_HOST_FEATURES_SEL 0x014 +#define VIRTIO_MMIO_GUEST_FEATURES 0x020 +#define VIRTIO_MMIO_GUEST_FEATURES_SEL 0x024 +#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 +#define VIRTIO_MMIO_QUEUE_SEL 0x030 +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 +#define VIRTIO_MMIO_QUEUE_NUM 0x038 +#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c +#define VIRTIO_MMIO_QUEUE_PFN 0x040 +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 +#define VIRTIO_MMIO_STATUS 0x070 +#define VIRTIO_MMIO_CONFIG 0x100 + +#define VIRTIO_MMIO_INT_VRING (1 << 0) +#define VIRTIO_MMIO_INT_CONFIG (1 << 1) + +#define VIRTIO_MMIO_VRING_ALIGN PAGE_SIZE + +/* + * The minimum queue size is 2*VIRTIO_MMIO_VRING_ALIGN, which + * means the largest queue num for the minimum queue size is 128, i.e. + * 2*VIRTIO_MMIO_VRING_ALIGN = vring_size(128, VIRTIO_MMIO_VRING_ALIGN), + * where vring_size is + * + * unsigned vring_size(unsigned num, unsigned long align) + * { + * return ((sizeof(struct vring_desc) * num + sizeof(u16) * (3 + num) + * + align - 1) & ~(align - 1)) + * + sizeof(u16) * 3 + sizeof(struct vring_used_elem) * num; + * } + */ +#define VIRTIO_MMIO_QUEUE_SIZE_MIN (2*VIRTIO_MMIO_VRING_ALIGN) +#define VIRTIO_MMIO_QUEUE_NUM_MIN 128 + +#define to_virtio_mmio_device(vdev_ptr) \ + container_of(vdev_ptr, struct virtio_mmio_device, vdev) + +struct virtio_mmio_device { + struct virtio_device vdev; + void *base; +}; + +extern struct virtio_device *virtio_mmio_bind(u32 devid); + +#endif /* _VIRTIO_MMIO_H_ */ diff --git a/tests/kvm-unit-tests/lib/virtio.c b/tests/kvm-unit-tests/lib/virtio.c new file mode 100644 index 00000000..9532d1ae --- /dev/null +++ b/tests/kvm-unit-tests/lib/virtio.c @@ -0,0 +1,130 @@ +/* + * virtqueue support adapted from the Linux kernel. + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "asm/io.h" +#include "virtio.h" +#include "virtio-mmio.h" + +void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = p + num*sizeof(struct vring_desc); + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(u16) + + align-1) & ~(align - 1)); +} + +void vring_init_virtqueue(struct vring_virtqueue *vq, unsigned index, + unsigned num, unsigned vring_align, + struct virtio_device *vdev, void *pages, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name) +{ + unsigned i; + + vring_init(&vq->vring, num, pages, vring_align); + vq->vq.callback = callback; + vq->vq.vdev = vdev; + vq->vq.name = name; + vq->vq.num_free = num; + vq->vq.index = index; + vq->notify = notify; + vq->last_used_idx = 0; + vq->num_added = 0; + vq->free_head = 0; + + for (i = 0; i < num-1; i++) { + vq->vring.desc[i].next = i+1; + vq->data[i] = NULL; + } + vq->data[i] = NULL; +} + +int virtqueue_add_outbuf(struct virtqueue *_vq, char *buf, unsigned int len) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + unsigned avail; + int head; + + assert(buf != NULL); + assert(len != 0); + + if (!vq->vq.num_free) + return -1; + + --vq->vq.num_free; + + head = vq->free_head; + + vq->vring.desc[head].flags = 0; + vq->vring.desc[head].addr = virt_to_phys(buf); + vq->vring.desc[head].len = len; + + vq->free_head = vq->vring.desc[head].next; + + vq->data[head] = buf; + + avail = (vq->vring.avail->idx & (vq->vring.num-1)); + vq->vring.avail->ring[avail] = head; + wmb(); + vq->vring.avail->idx++; + vq->num_added++; + + return 0; +} + +bool virtqueue_kick(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + mb(); + return vq->notify(_vq); +} + +void detach_buf(struct vring_virtqueue *vq, unsigned head) +{ + unsigned i = head; + + vq->data[head] = NULL; + + while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { + i = vq->vring.desc[i].next; + vq->vq.num_free++; + } + + vq->vring.desc[i].next = vq->free_head; + vq->free_head = head; + vq->vq.num_free++; +} + +void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 last_used; + unsigned i; + void *ret; + + rmb(); + + last_used = (vq->last_used_idx & (vq->vring.num-1)); + i = vq->vring.used->ring[last_used].id; + *len = vq->vring.used->ring[last_used].len; + + ret = vq->data[i]; + detach_buf(vq, i); + + vq->last_used_idx++; + + return ret; +} + +struct virtio_device *virtio_bind(u32 devid) +{ + return virtio_mmio_bind(devid); +} diff --git a/tests/kvm-unit-tests/lib/virtio.h b/tests/kvm-unit-tests/lib/virtio.h new file mode 100644 index 00000000..4801e204 --- /dev/null +++ b/tests/kvm-unit-tests/lib/virtio.h @@ -0,0 +1,150 @@ +#ifndef _VIRTIO_H_ +#define _VIRTIO_H_ +/* + * A minimal implementation of virtio. + * Structures adapted from the Linux Kernel. + * + * Copyright (C) 2014, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" + +#define VIRTIO_ID_CONSOLE 3 + +struct virtio_device_id { + u32 device; + u32 vendor; +}; + +struct virtio_device { + struct virtio_device_id id; + const struct virtio_config_ops *config; +}; + +struct virtqueue { + void (*callback)(struct virtqueue *vq); + const char *name; + struct virtio_device *vdev; + unsigned int index; + unsigned int num_free; + void *priv; +}; + +typedef void vq_callback_t(struct virtqueue *); +struct virtio_config_ops { + void (*get)(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len); + void (*set)(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len); + int (*find_vqs)(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]); +}; + +static inline u8 +virtio_config_readb(struct virtio_device *vdev, unsigned offset) +{ + u8 val; + vdev->config->get(vdev, offset, &val, 1); + return val; +} + +static inline u16 +virtio_config_readw(struct virtio_device *vdev, unsigned offset) +{ + u16 val; + vdev->config->get(vdev, offset, &val, 2); + return val; +} + +static inline u32 +virtio_config_readl(struct virtio_device *vdev, unsigned offset) +{ + u32 val; + vdev->config->get(vdev, offset, &val, 4); + return val; +} + +static inline void +virtio_config_writeb(struct virtio_device *vdev, unsigned offset, u8 val) +{ + vdev->config->set(vdev, offset, &val, 1); +} + +static inline void +virtio_config_writew(struct virtio_device *vdev, unsigned offset, u16 val) +{ + vdev->config->set(vdev, offset, &val, 2); +} + +static inline void +virtio_config_writel(struct virtio_device *vdev, unsigned offset, u32 val) +{ + vdev->config->set(vdev, offset, &val, 4); +} + +#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2 + +struct vring_desc { + u64 addr; + u32 len; + u16 flags; + u16 next; +}; + +struct vring_avail { + u16 flags; + u16 idx; + u16 ring[]; +}; + +struct vring_used_elem { + u32 id; + u32 len; +}; + +struct vring_used { + u16 flags; + u16 idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; +}; + +struct vring_virtqueue { + struct virtqueue vq; + struct vring vring; + unsigned int free_head; + unsigned int num_added; + u16 last_used_idx; + bool (*notify)(struct virtqueue *vq); + void *data[]; +}; + +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +extern void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align); +extern void vring_init_virtqueue(struct vring_virtqueue *vq, unsigned index, + unsigned num, unsigned vring_align, + struct virtio_device *vdev, void *pages, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name); +extern int virtqueue_add_outbuf(struct virtqueue *vq, char *buf, + unsigned int len); +extern bool virtqueue_kick(struct virtqueue *vq); +extern void detach_buf(struct vring_virtqueue *vq, unsigned head); +extern void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len); + +extern struct virtio_device *virtio_bind(u32 devid); + +#endif /* _VIRTIO_H_ */ diff --git a/tests/kvm-unit-tests/lib/x86/acpi.c b/tests/kvm-unit-tests/lib/x86/acpi.c new file mode 100644 index 00000000..43731062 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/acpi.c @@ -0,0 +1,52 @@ +#include "libcflat.h" +#include "acpi.h" + +void* find_acpi_table_addr(u32 sig) +{ + unsigned long addr; + struct rsdp_descriptor *rsdp; + struct rsdt_descriptor_rev1 *rsdt; + void *end; + int i; + + /* FACS is special... */ + if (sig == FACS_SIGNATURE) { + struct fadt_descriptor_rev1 *fadt; + fadt = find_acpi_table_addr(FACP_SIGNATURE); + if (!fadt) { + return NULL; + } + return (void*)(ulong)fadt->firmware_ctrl; + } + + for(addr = 0xf0000; addr < 0x100000; addr += 16) { + rsdp = (void*)addr; + if (rsdp->signature == 0x2052545020445352LL) + break; + } + if (addr == 0x100000) { + printf("Can't find RSDP\n"); + return 0; + } + + if (sig == RSDP_SIGNATURE) { + return rsdp; + } + + rsdt = (void*)(ulong)rsdp->rsdt_physical_address; + if (!rsdt || rsdt->signature != RSDT_SIGNATURE) + return 0; + + if (sig == RSDT_SIGNATURE) { + return rsdt; + } + + end = (void*)rsdt + rsdt->length; + for (i=0; (void*)&rsdt->table_offset_entry[i] < end; i++) { + struct acpi_table *t = (void*)(ulong)rsdt->table_offset_entry[i]; + if (t && t->signature == sig) { + return t; + } + } + return NULL; +} diff --git a/tests/kvm-unit-tests/lib/x86/acpi.h b/tests/kvm-unit-tests/lib/x86/acpi.h new file mode 100644 index 00000000..08aaf57a --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/acpi.h @@ -0,0 +1,104 @@ +#ifndef KVM_ACPI_H +#define KVM_ACPI_H 1 + +#include "libcflat.h" + +#define ACPI_SIGNATURE(c1, c2, c3, c4) \ + ((c1) | ((c2) << 8) | ((c3) << 16) | ((c4) << 24)) + +#define RSDP_SIGNATURE ACPI_SIGNATURE('R','S','D','P') +#define RSDT_SIGNATURE ACPI_SIGNATURE('R','S','D','T') +#define FACP_SIGNATURE ACPI_SIGNATURE('F','A','C','P') +#define FACS_SIGNATURE ACPI_SIGNATURE('F','A','C','S') + +struct rsdp_descriptor { /* Root System Descriptor Pointer */ + u64 signature; /* ACPI signature, contains "RSD PTR " */ + u8 checksum; /* To make sum of struct == 0 */ + u8 oem_id [6]; /* OEM identification */ + u8 revision; /* Must be 0 for 1.0, 2 for 2.0 */ + u32 rsdt_physical_address; /* 32-bit physical address of RSDT */ + u32 length; /* XSDT Length in bytes including hdr */ + u64 xsdt_physical_address; /* 64-bit physical address of XSDT */ + u8 extended_checksum; /* Checksum of entire table */ + u8 reserved [3]; /* Reserved field must be 0 */ +}; + +#define ACPI_TABLE_HEADER_DEF /* ACPI common table header */ \ + u32 signature; /* ACPI signature (4 ASCII characters) */ \ + u32 length; /* Length of table, in bytes, including header */ \ + u8 revision; /* ACPI Specification minor version # */ \ + u8 checksum; /* To make sum of entire table == 0 */ \ + u8 oem_id [6]; /* OEM identification */ \ + u8 oem_table_id [8]; /* OEM table identification */ \ + u32 oem_revision; /* OEM revision number */ \ + u8 asl_compiler_id [4]; /* ASL compiler vendor ID */ \ + u32 asl_compiler_revision; /* ASL compiler revision number */ + +struct acpi_table { + ACPI_TABLE_HEADER_DEF + char data[0]; +}; + +struct rsdt_descriptor_rev1 { + ACPI_TABLE_HEADER_DEF + u32 table_offset_entry[0]; +}; + +struct fadt_descriptor_rev1 +{ + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + u32 firmware_ctrl; /* Physical address of FACS */ + u32 dsdt; /* Physical address of DSDT */ + u8 model; /* System Interrupt Model */ + u8 reserved1; /* Reserved */ + u16 sci_int; /* System vector of SCI interrupt */ + u32 smi_cmd; /* Port address of SMI command port */ + u8 acpi_enable; /* Value to write to smi_cmd to enable ACPI */ + u8 acpi_disable; /* Value to write to smi_cmd to disable ACPI */ + u8 S4bios_req; /* Value to write to SMI CMD to enter S4BIOS state */ + u8 reserved2; /* Reserved - must be zero */ + u32 pm1a_evt_blk; /* Port address of Power Mgt 1a acpi_event Reg Blk */ + u32 pm1b_evt_blk; /* Port address of Power Mgt 1b acpi_event Reg Blk */ + u32 pm1a_cnt_blk; /* Port address of Power Mgt 1a Control Reg Blk */ + u32 pm1b_cnt_blk; /* Port address of Power Mgt 1b Control Reg Blk */ + u32 pm2_cnt_blk; /* Port address of Power Mgt 2 Control Reg Blk */ + u32 pm_tmr_blk; /* Port address of Power Mgt Timer Ctrl Reg Blk */ + u32 gpe0_blk; /* Port addr of General Purpose acpi_event 0 Reg Blk */ + u32 gpe1_blk; /* Port addr of General Purpose acpi_event 1 Reg Blk */ + u8 pm1_evt_len; /* Byte length of ports at pm1_x_evt_blk */ + u8 pm1_cnt_len; /* Byte length of ports at pm1_x_cnt_blk */ + u8 pm2_cnt_len; /* Byte Length of ports at pm2_cnt_blk */ + u8 pm_tmr_len; /* Byte Length of ports at pm_tm_blk */ + u8 gpe0_blk_len; /* Byte Length of ports at gpe0_blk */ + u8 gpe1_blk_len; /* Byte Length of ports at gpe1_blk */ + u8 gpe1_base; /* Offset in gpe model where gpe1 events start */ + u8 reserved3; /* Reserved */ + u16 plvl2_lat; /* Worst case HW latency to enter/exit C2 state */ + u16 plvl3_lat; /* Worst case HW latency to enter/exit C3 state */ + u16 flush_size; /* Size of area read to flush caches */ + u16 flush_stride; /* Stride used in flushing caches */ + u8 duty_offset; /* Bit location of duty cycle field in p_cnt reg */ + u8 duty_width; /* Bit width of duty cycle field in p_cnt reg */ + u8 day_alrm; /* Index to day-of-month alarm in RTC CMOS RAM */ + u8 mon_alrm; /* Index to month-of-year alarm in RTC CMOS RAM */ + u8 century; /* Index to century in RTC CMOS RAM */ + u8 reserved4; /* Reserved */ + u8 reserved4a; /* Reserved */ + u8 reserved4b; /* Reserved */ +}; + +struct facs_descriptor_rev1 +{ + u32 signature; /* ACPI Signature */ + u32 length; /* Length of structure, in bytes */ + u32 hardware_signature; /* Hardware configuration signature */ + u32 firmware_waking_vector; /* ACPI OS waking vector */ + u32 global_lock; /* Global Lock */ + u32 S4bios_f : 1; /* Indicates if S4BIOS support is present */ + u32 reserved1 : 31; /* Must be 0 */ + u8 reserved3 [40]; /* Reserved - must be zero */ +}; + +void* find_acpi_table_addr(u32 sig); + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/apic-defs.h b/tests/kvm-unit-tests/lib/x86/apic-defs.h new file mode 100644 index 00000000..e0c3ccac --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/apic-defs.h @@ -0,0 +1,138 @@ +#ifndef _ASM_X86_APICDEF_H +#define _ASM_X86_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox , 1995. + * Ingo Molnar , 1999, 2000 + */ + +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 +#define APIC_BSP (1UL << 8) +#define APIC_EXTD (1UL << 10) +#define APIC_EN (1UL << 11) + +#define APIC_ID 0x20 + +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define GET_APIC_VERSION(x) ((x) & 0xFFu) +#define GET_APIC_MAXLVT(x) (((x) >> 16) & 0xFFu) +#ifdef CONFIG_X86_32 +# define APIC_INTEGRATED(x) ((x) & 0xF0u) +#else +# define APIC_INTEGRATED(x) (1) +#endif +#define APIC_XAPIC(x) ((x) >= 0x14) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFFu +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFFu +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFFu << 24) +#define GET_APIC_LOGICAL_ID(x) (((x) >> 24) & 0xFFu) +#define SET_APIC_LOGICAL_ID(x) (((x) << 24)) +#define APIC_ALL_CPUS 0xFFu +#define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul +#define APIC_DFR_FLAT 0xFFFFFFFFul +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ISR 0x100 +#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF) +#define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVTTHMR 0x330 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18) +#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3) +#define SET_APIC_TIMER_BASE(x) (((x) << 18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_LVT_LEVEL_TRIGGER (1 << 15) +#define APIC_LVT_REMOTE_IRR (1 << 14) +#define APIC_INPUT_POLARITY (1 << 13) +#define APIC_SEND_PENDING (1 << 12) +#define APIC_MODE_MASK 0x700 +#define GET_APIC_DELIVERY_MODE(x) (((x) >> 8) & 0x7) +#define SET_APIC_DELIVERY_MODE(x, y) (((x) & ~0x700) | ((y) << 8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXTINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 +#define APIC_TDR_DIV_TMBASE (1 << 2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA +#define APIC_EILVT0 0x500 +#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ +#define APIC_EILVT_NR_AMD_10H 4 +#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) +#define APIC_EILVT_MSG_FIX 0x0 +#define APIC_EILVT_MSG_SMI 0x2 +#define APIC_EILVT_MSG_NMI 0x4 +#define APIC_EILVT_MSG_EXT 0x7 +#define APIC_EILVT_MASKED (1 << 16) +#define APIC_EILVT1 0x510 +#define APIC_EILVT2 0x520 +#define APIC_EILVT3 0x530 + +#define APIC_BASE_MSR 0x800 + +#endif /* _ASM_X86_APICDEF_H */ diff --git a/tests/kvm-unit-tests/lib/x86/apic.c b/tests/kvm-unit-tests/lib/x86/apic.c new file mode 100644 index 00000000..6b5b0680 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/apic.c @@ -0,0 +1,208 @@ +#include "libcflat.h" +#include "apic.h" +#include "msr.h" +#include "processor.h" + +static void *g_apic = (void *)0xfee00000; +static void *g_ioapic = (void *)0xfec00000; + +struct apic_ops { + u32 (*reg_read)(unsigned reg); + void (*reg_write)(unsigned reg, u32 val); + void (*icr_write)(u32 val, u32 dest); + u32 (*id)(void); +}; + +static void outb(unsigned char data, unsigned short port) +{ + asm volatile ("out %0, %1" : : "a"(data), "d"(port)); +} + +void eoi(void) +{ + apic_write(APIC_EOI, 0); +} + +static u32 xapic_read(unsigned reg) +{ + return *(volatile u32 *)(g_apic + reg); +} + +static void xapic_write(unsigned reg, u32 val) +{ + *(volatile u32 *)(g_apic + reg) = val; +} + +static void xapic_icr_write(u32 val, u32 dest) +{ + while (xapic_read(APIC_ICR) & APIC_ICR_BUSY) + ; + xapic_write(APIC_ICR2, dest << 24); + xapic_write(APIC_ICR, val); +} + +static uint32_t xapic_id(void) +{ + return xapic_read(APIC_ID) >> 24; +} + +static const struct apic_ops xapic_ops = { + .reg_read = xapic_read, + .reg_write = xapic_write, + .icr_write = xapic_icr_write, + .id = xapic_id, +}; + +static const struct apic_ops *apic_ops = &xapic_ops; + +static u32 x2apic_read(unsigned reg) +{ + unsigned a, d; + + asm volatile ("rdmsr" : "=a"(a), "=d"(d) : "c"(APIC_BASE_MSR + reg/16)); + return a | (u64)d << 32; +} + +static void x2apic_write(unsigned reg, u32 val) +{ + asm volatile ("wrmsr" : : "a"(val), "d"(0), "c"(APIC_BASE_MSR + reg/16)); +} + +static void x2apic_icr_write(u32 val, u32 dest) +{ + asm volatile ("wrmsr" : : "a"(val), "d"(dest), + "c"(APIC_BASE_MSR + APIC_ICR/16)); +} + +static uint32_t x2apic_id(void) +{ + return x2apic_read(APIC_ID); +} + +static const struct apic_ops x2apic_ops = { + .reg_read = x2apic_read, + .reg_write = x2apic_write, + .icr_write = x2apic_icr_write, + .id = x2apic_id, +}; + +u32 apic_read(unsigned reg) +{ + return apic_ops->reg_read(reg); +} + +void apic_write(unsigned reg, u32 val) +{ + apic_ops->reg_write(reg, val); +} + +bool apic_read_bit(unsigned reg, int n) +{ + reg += (n >> 5) << 4; + n &= 31; + return (apic_read(reg) & (1 << n)) != 0; +} + +void apic_icr_write(u32 val, u32 dest) +{ + apic_ops->icr_write(val, dest); +} + +uint32_t apic_id(void) +{ + return apic_ops->id(); +} + +uint8_t apic_get_tpr(void) +{ + unsigned long tpr; + +#ifdef __x86_64__ + asm volatile ("mov %%cr8, %0" : "=r"(tpr)); +#else + tpr = apic_read(APIC_TASKPRI) >> 4; +#endif + return tpr; +} + +void apic_set_tpr(uint8_t tpr) +{ +#ifdef __x86_64__ + asm volatile ("mov %0, %%cr8" : : "r"((unsigned long) tpr)); +#else + apic_write(APIC_TASKPRI, tpr << 4); +#endif +} + +int enable_x2apic(void) +{ + unsigned a, b, c, d; + + asm ("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(1)); + + if (c & (1 << 21)) { + asm ("rdmsr" : "=a"(a), "=d"(d) : "c"(MSR_IA32_APICBASE)); + a |= 1 << 10; + asm ("wrmsr" : : "a"(a), "d"(d), "c"(MSR_IA32_APICBASE)); + apic_ops = &x2apic_ops; + return 1; + } else { + return 0; + } +} + +void reset_apic(void) +{ + u64 disabled = rdmsr(MSR_IA32_APICBASE) & ~(APIC_EN | APIC_EXTD); + wrmsr(MSR_IA32_APICBASE, disabled); + apic_ops = &xapic_ops; + wrmsr(MSR_IA32_APICBASE, disabled | APIC_EN); +} + +u32 ioapic_read_reg(unsigned reg) +{ + *(volatile u32 *)g_ioapic = reg; + return *(volatile u32 *)(g_ioapic + 0x10); +} + +void ioapic_write_reg(unsigned reg, u32 value) +{ + *(volatile u32 *)g_ioapic = reg; + *(volatile u32 *)(g_ioapic + 0x10) = value; +} + +void ioapic_write_redir(unsigned line, ioapic_redir_entry_t e) +{ + ioapic_write_reg(0x10 + line * 2 + 0, ((u32 *)&e)[0]); + ioapic_write_reg(0x10 + line * 2 + 1, ((u32 *)&e)[1]); +} + +ioapic_redir_entry_t ioapic_read_redir(unsigned line) +{ + ioapic_redir_entry_t e; + + ((u32 *)&e)[0] = ioapic_read_reg(0x10 + line * 2 + 0); + ((u32 *)&e)[1] = ioapic_read_reg(0x10 + line * 2 + 1); + return e; + +} + +void set_mask(unsigned line, int mask) +{ + ioapic_redir_entry_t e = ioapic_read_redir(line); + + e.mask = mask; + ioapic_write_redir(line, e); +} + +void enable_apic(void) +{ + printf("enabling apic\n"); + xapic_write(0xf0, 0x1ff); /* spurious vector register */ +} + +void mask_pic_interrupts(void) +{ + outb(0xff, 0x21); + outb(0xff, 0xa1); +} diff --git a/tests/kvm-unit-tests/lib/x86/apic.h b/tests/kvm-unit-tests/lib/x86/apic.h new file mode 100644 index 00000000..699102cb --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/apic.h @@ -0,0 +1,50 @@ +#ifndef CFLAT_APIC_H +#define CFLAT_APIC_H + +#include +#include "apic-defs.h" + +typedef struct { + uint8_t vector; + uint8_t delivery_mode:3; + uint8_t dest_mode:1; + uint8_t delivery_status:1; + uint8_t polarity:1; + uint8_t remote_irr:1; + uint8_t trig_mode:1; + uint8_t mask:1; + uint8_t reserve:7; + uint8_t reserved[4]; + uint8_t dest_id; +} ioapic_redir_entry_t; + +typedef enum trigger_mode { + TRIGGER_EDGE = 0, + TRIGGER_LEVEL, + TRIGGER_MAX, +} trigger_mode_t; + +void mask_pic_interrupts(void); + +void eoi(void); +uint8_t apic_get_tpr(void); +void apic_set_tpr(uint8_t tpr); + +void ioapic_write_redir(unsigned line, ioapic_redir_entry_t e); +void ioapic_write_reg(unsigned reg, uint32_t value); +ioapic_redir_entry_t ioapic_read_redir(unsigned line); +uint32_t ioapic_read_reg(unsigned reg); + +void set_mask(unsigned line, int mask); + +void enable_apic(void); +uint32_t apic_read(unsigned reg); +bool apic_read_bit(unsigned reg, int n); +void apic_write(unsigned reg, uint32_t val); +void apic_icr_write(uint32_t val, uint32_t dest); +uint32_t apic_id(void); + +int enable_x2apic(void); +void reset_apic(void); + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/asm/barrier.h b/tests/kvm-unit-tests/lib/x86/asm/barrier.h new file mode 100644 index 00000000..193fb4c2 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/asm/barrier.h @@ -0,0 +1,27 @@ +#ifndef _ASM_X86_BARRIER_H_ +#define _ASM_X86_BARRIER_H_ +/* + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ + +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence":::"memory") + +#define smp_rmb() barrier() +#define smp_wmb() barrier() + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + asm volatile("rep; nop" ::: "memory"); +} + +static inline void cpu_relax(void) +{ + rep_nop(); +} + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/asm/bitops.h b/tests/kvm-unit-tests/lib/x86/asm/bitops.h new file mode 100644 index 00000000..eb4aaa9f --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/asm/bitops.h @@ -0,0 +1,14 @@ +#ifndef _ASMX86_BITOPS_H_ +#define _ASMX86_BITOPS_H_ + +#ifndef _BITOPS_H_ +#error only can be included directly +#endif + +#ifdef __x86_64__ +#define BITS_PER_LONG 64 +#else +#define BITS_PER_LONG 32 +#endif + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/asm/io.h b/tests/kvm-unit-tests/lib/x86/asm/io.h new file mode 100644 index 00000000..35a5c734 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/asm/io.h @@ -0,0 +1,65 @@ +#ifndef _ASM_X86_IO_H_ +#define _ASM_X86_IO_H_ + +#define __iomem + +#define inb inb +static inline uint8_t inb(unsigned long port) +{ + unsigned char value; + asm volatile("inb %w1, %0" : "=a" (value) : "Nd" ((unsigned short)port)); + return value; +} + +#define inw inw +static inline uint16_t inw(unsigned long port) +{ + unsigned short value; + asm volatile("inw %w1, %0" : "=a" (value) : "Nd" ((unsigned short)port)); + return value; +} + +#define inl inl +static inline uint32_t inl(unsigned long port) +{ + unsigned int value; + asm volatile("inl %w1, %0" : "=a" (value) : "Nd" ((unsigned short)port)); + return value; +} + +#define outb outb +static inline void outb(uint8_t value, unsigned long port) +{ + asm volatile("outb %b0, %w1" : : "a"(value), "Nd"((unsigned short)port)); +} + +#define outw outw +static inline void outw(uint16_t value, unsigned long port) +{ + asm volatile("outw %w0, %w1" : : "a"(value), "Nd"((unsigned short)port)); +} + +#define outl outl +static inline void outl(uint32_t value, unsigned long port) +{ + asm volatile("outl %0, %w1" : : "a"(value), "Nd"((unsigned short)port)); +} + +#define virt_to_phys virt_to_phys +static inline unsigned long virt_to_phys(const void *virt) +{ + return (unsigned long)virt; +} + +#define phys_to_virt phys_to_virt +static inline void *phys_to_virt(unsigned long phys) +{ + return (void *)phys; +} + +#define ioremap ioremap +void __iomem *ioremap(phys_addr_t phys_addr, size_t size); + +#include + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/asm/page.h b/tests/kvm-unit-tests/lib/x86/asm/page.h new file mode 100644 index 00000000..c43bab28 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/asm/page.h @@ -0,0 +1,48 @@ +#ifndef _ASM_X86_PAGE_H_ +#define _ASM_X86_PAGE_H_ +/* + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ + + +#include +#include + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifndef __ASSEMBLY__ + +#ifdef __x86_64__ +#define LARGE_PAGE_SIZE (512 * PAGE_SIZE) +#else +#define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) +#endif + +#define PT_PRESENT_MASK (1ull << 0) +#define PT_WRITABLE_MASK (1ull << 1) +#define PT_USER_MASK (1ull << 2) +#define PT_ACCESSED_MASK (1ull << 5) +#define PT_DIRTY_MASK (1ull << 6) +#define PT_PAGE_SIZE_MASK (1ull << 7) +#define PT64_NX_MASK (1ull << 63) +#define PT_ADDR_MASK GENMASK_ULL(51, 12) + +#ifdef __x86_64__ +#define PAGE_LEVEL 4 +#define PGDIR_WIDTH 9 +#define PGDIR_MASK 511 +#else +#define PAGE_LEVEL 2 +#define PGDIR_WIDTH 10 +#define PGDIR_MASK 1023 +#endif + +#define PGDIR_BITS(lvl) (((lvl) - 1) * PGDIR_WIDTH + PAGE_SHIFT) +#define PGDIR_OFFSET(va, lvl) (((va) >> PGDIR_BITS(lvl)) & PGDIR_MASK) + +#endif /* !__ASSEMBLY__ */ +#endif diff --git a/tests/kvm-unit-tests/lib/x86/asm/pci.h b/tests/kvm-unit-tests/lib/x86/asm/pci.h new file mode 100644 index 00000000..c937e5cd --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/asm/pci.h @@ -0,0 +1,59 @@ +#ifndef ASM_PCI_H +#define ASM_PCI_H +/* + * Copyright (C) 2013, Red Hat Inc, Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" +#include "pci.h" +#include "x86/asm/io.h" + +#define PCI_CONF1_ADDRESS(dev, reg) ((0x1 << 31) | (dev << 8) | reg) + +static inline uint8_t pci_config_readb(pcidevaddr_t dev, uint8_t reg) +{ + outl(PCI_CONF1_ADDRESS(dev, reg), 0xCF8); + return inb(0xCFC); +} + +static inline uint16_t pci_config_readw(pcidevaddr_t dev, uint8_t reg) +{ + outl(PCI_CONF1_ADDRESS(dev, reg), 0xCF8); + return inw(0xCFC); +} + +static inline uint32_t pci_config_readl(pcidevaddr_t dev, uint8_t reg) +{ + outl(PCI_CONF1_ADDRESS(dev, reg), 0xCF8); + return inl(0xCFC); +} + +static inline void pci_config_writeb(pcidevaddr_t dev, uint8_t reg, + uint8_t val) +{ + outl(PCI_CONF1_ADDRESS(dev, reg), 0xCF8); + outb(val, 0xCFC); +} + +static inline void pci_config_writew(pcidevaddr_t dev, uint8_t reg, + uint16_t val) +{ + outl(PCI_CONF1_ADDRESS(dev, reg), 0xCF8); + outw(val, 0xCFC); +} + +static inline void pci_config_writel(pcidevaddr_t dev, uint8_t reg, + uint32_t val) +{ + outl(PCI_CONF1_ADDRESS(dev, reg), 0xCF8); + outl(val, 0xCFC); +} + +static inline +phys_addr_t pci_translate_addr(pcidevaddr_t dev __unused, uint64_t addr) +{ + return addr; +} + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/asm/spinlock.h b/tests/kvm-unit-tests/lib/x86/asm/spinlock.h new file mode 100644 index 00000000..4b0cb331 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/asm/spinlock.h @@ -0,0 +1,11 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +struct spinlock { + int v; +}; + +void spin_lock(struct spinlock *lock); +void spin_unlock(struct spinlock *lock); + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/asm/stack.h b/tests/kvm-unit-tests/lib/x86/asm/stack.h new file mode 100644 index 00000000..b14e2c0f --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/asm/stack.h @@ -0,0 +1,11 @@ +#ifndef _X86ASM_STACK_H_ +#define _X86ASM_STACK_H_ + +#ifndef _STACK_H_ +#error Do not directly include . Just use . +#endif + +#define HAVE_ARCH_BACKTRACE_FRAME +#define HAVE_ARCH_BACKTRACE + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/atomic.c b/tests/kvm-unit-tests/lib/x86/atomic.c new file mode 100644 index 00000000..da74ff21 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/atomic.c @@ -0,0 +1,37 @@ +#include +#include "atomic.h" + +#ifdef __i386__ + +u64 atomic64_cmpxchg(atomic64_t *v, u64 old, u64 new) +{ + u32 low = new; + u32 high = new >> 32; + + asm volatile("lock cmpxchg8b %1\n" + : "+A" (old), + "+m" (*(volatile long long *)&v->counter) + : "b" (low), "c" (high) + : "memory" + ); + + return old; +} + +#else + +u64 atomic64_cmpxchg(atomic64_t *v, u64 old, u64 new) +{ + u64 ret; + u64 _old = old; + u64 _new = new; + + asm volatile("lock cmpxchgq %2,%1" + : "=a" (ret), "+m" (*(volatile long *)&v->counter) + : "r" (_new), "0" (_old) + : "memory" + ); + return ret; +} + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/atomic.h b/tests/kvm-unit-tests/lib/x86/atomic.h new file mode 100644 index 00000000..c9ce489d --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/atomic.h @@ -0,0 +1,166 @@ +#ifndef __ATOMIC_H +#define __ATOMIC_H + +#include "asm-generic/atomic.h" + +typedef struct { + volatile int counter; +} atomic_t; + +#ifdef __i386__ + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return v->counter; +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile("lock incl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec(atomic_t *v) +{ + asm volatile("lock decl %0" + : "+m" (v->counter)); +} + +typedef struct { + u64 __attribute__((aligned(8))) counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +/** + * atomic64_read - read atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically reads the value of @ptr and returns it. + */ +static inline u64 atomic64_read(atomic64_t *ptr) +{ + u64 res; + + /* + * Note, we inline this atomic64_t primitive because + * it only clobbers EAX/EDX and leaves the others + * untouched. We also (somewhat subtly) rely on the + * fact that cmpxchg8b returns the current 64-bit value + * of the memory location we are touching: + */ + asm volatile("mov %%ebx, %%eax\n\t" + "mov %%ecx, %%edx\n\t" + "lock cmpxchg8b %1\n" + : "=&A" (res) + : "m" (*ptr) + ); + return res; +} + +u64 atomic64_cmpxchg(atomic64_t *v, u64 old, u64 new); + +#elif defined(__x86_64__) + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return v->counter; +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile("lock incl %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec(atomic_t *v) +{ + asm volatile("lock decl %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +typedef struct { + long long counter; +} atomic64_t; + +#define ATOMIC64_INIT(i) { (i) } + +/** + * atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +static inline long atomic64_read(const atomic64_t *v) +{ + return v->counter; +} + +u64 atomic64_cmpxchg(atomic64_t *v, u64 old, u64 new); + +#endif + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/desc.c b/tests/kvm-unit-tests/lib/x86/desc.c new file mode 100644 index 00000000..402204dd --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/desc.c @@ -0,0 +1,407 @@ +#include "libcflat.h" +#include "desc.h" +#include "processor.h" +#include + +void set_idt_entry(int vec, void *addr, int dpl) +{ + idt_entry_t *e = &boot_idt[vec]; + memset(e, 0, sizeof *e); + e->offset0 = (unsigned long)addr; + e->selector = read_cs(); + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = (unsigned long)addr >> 16; +#ifdef __x86_64__ + e->offset2 = (unsigned long)addr >> 32; +#endif +} + +void set_idt_dpl(int vec, u16 dpl) +{ + idt_entry_t *e = &boot_idt[vec]; + e->dpl = dpl; +} + +void set_idt_sel(int vec, u16 sel) +{ + idt_entry_t *e = &boot_idt[vec]; + e->selector = sel; +} + +struct ex_record { + unsigned long rip; + unsigned long handler; +}; + +extern struct ex_record exception_table_start, exception_table_end; + +static const char* exception_mnemonic(int vector) +{ + switch(vector) { + case 0: return "#DE"; + case 1: return "#DB"; + case 2: return "#NMI"; + case 3: return "#BP"; + case 4: return "#OF"; + case 5: return "#BR"; + case 6: return "#UD"; + case 7: return "#NM"; + case 8: return "#DF"; + case 10: return "#TS"; + case 11: return "#NP"; + case 12: return "#SS"; + case 13: return "#GP"; + case 14: return "#PF"; + case 16: return "#MF"; + case 17: return "#AC"; + case 18: return "#MC"; + case 19: return "#XM"; + default: return "#??"; + } +} + +static void unhandled_exception(struct ex_regs *regs, bool cpu) +{ + printf("Unhandled %sexception %ld %s at ip %016lx\n", + cpu ? "cpu " : "", regs->vector, + exception_mnemonic(regs->vector), regs->rip); + if (regs->vector == 14) + printf("PF at 0x%lx addr 0x%lx\n", regs->rip, read_cr2()); + + printf("error_code=%04lx rflags=%08lx cs=%08lx\n" + "rax=%016lx rcx=%016lx rdx=%016lx rbx=%016lx\n" + "rbp=%016lx rsi=%016lx rdi=%016lx\n" +#ifdef __x86_64__ + " r8=%016lx r9=%016lx r10=%016lx r11=%016lx\n" + "r12=%016lx r13=%016lx r14=%016lx r15=%016lx\n" +#endif + "cr0=%016lx cr2=%016lx cr3=%016lx cr4=%016lx\n" +#ifdef __x86_64__ + "cr8=%016lx\n" +#endif + , + regs->error_code, regs->rflags, regs->cs, + regs->rax, regs->rcx, regs->rdx, regs->rbx, + regs->rbp, regs->rsi, regs->rdi, +#ifdef __x86_64__ + regs->r8, regs->r9, regs->r10, regs->r11, + regs->r12, regs->r13, regs->r14, regs->r15, +#endif + read_cr0(), read_cr2(), read_cr3(), read_cr4() +#ifdef __x86_64__ + , read_cr8() +#endif + ); + dump_frame_stack((void*) regs->rip, (void*) regs->rbp); + abort(); +} + +static void check_exception_table(struct ex_regs *regs) +{ + struct ex_record *ex; + unsigned ex_val; + + ex_val = regs->vector | (regs->error_code << 16) | + (((regs->rflags >> 16) & 1) << 8); + asm("mov %0, %%gs:4" : : "r"(ex_val)); + + for (ex = &exception_table_start; ex != &exception_table_end; ++ex) { + if (ex->rip == regs->rip) { + regs->rip = ex->handler; + return; + } + } + unhandled_exception(regs, false); +} + +static void (*exception_handlers[32])(struct ex_regs *regs); + + +void handle_exception(u8 v, void (*func)(struct ex_regs *regs)) +{ + if (v < 32) + exception_handlers[v] = func; +} + +#ifndef __x86_64__ +__attribute__((regparm(1))) +#endif +void do_handle_exception(struct ex_regs *regs) +{ + if (regs->vector < 32 && exception_handlers[regs->vector]) { + exception_handlers[regs->vector](regs); + return; + } + unhandled_exception(regs, true); +} + +#define EX(NAME, N) extern char NAME##_fault; \ + asm (".pushsection .text \n\t" \ + #NAME"_fault: \n\t" \ + "push"W" $0 \n\t" \ + "push"W" $"#N" \n\t" \ + "jmp __handle_exception \n\t" \ + ".popsection") + +#define EX_E(NAME, N) extern char NAME##_fault; \ + asm (".pushsection .text \n\t" \ + #NAME"_fault: \n\t" \ + "push"W" $"#N" \n\t" \ + "jmp __handle_exception \n\t" \ + ".popsection") + +EX(de, 0); +EX(db, 1); +EX(nmi, 2); +EX(bp, 3); +EX(of, 4); +EX(br, 5); +EX(ud, 6); +EX(nm, 7); +EX_E(df, 8); +EX_E(ts, 10); +EX_E(np, 11); +EX_E(ss, 12); +EX_E(gp, 13); +EX_E(pf, 14); +EX(mf, 16); +EX_E(ac, 17); +EX(mc, 18); +EX(xm, 19); + +asm (".pushsection .text \n\t" + "__handle_exception: \n\t" +#ifdef __x86_64__ + "push %r15; push %r14; push %r13; push %r12 \n\t" + "push %r11; push %r10; push %r9; push %r8 \n\t" +#endif + "push %"R "di; push %"R "si; push %"R "bp; sub $"S", %"R "sp \n\t" + "push %"R "bx; push %"R "dx; push %"R "cx; push %"R "ax \n\t" +#ifdef __x86_64__ + "mov %"R "sp, %"R "di \n\t" +#else + "mov %"R "sp, %"R "ax \n\t" +#endif + "call do_handle_exception \n\t" + "pop %"R "ax; pop %"R "cx; pop %"R "dx; pop %"R "bx \n\t" + "add $"S", %"R "sp; pop %"R "bp; pop %"R "si; pop %"R "di \n\t" +#ifdef __x86_64__ + "pop %r8; pop %r9; pop %r10; pop %r11 \n\t" + "pop %r12; pop %r13; pop %r14; pop %r15 \n\t" +#endif + "add $"S", %"R "sp \n\t" + "add $"S", %"R "sp \n\t" + "iret"W" \n\t" + ".popsection"); + +static void *idt_handlers[32] = { + [0] = &de_fault, + [1] = &db_fault, + [2] = &nmi_fault, + [3] = &bp_fault, + [4] = &of_fault, + [5] = &br_fault, + [6] = &ud_fault, + [7] = &nm_fault, + [8] = &df_fault, + [10] = &ts_fault, + [11] = &np_fault, + [12] = &ss_fault, + [13] = &gp_fault, + [14] = &pf_fault, + [16] = &mf_fault, + [17] = &ac_fault, + [18] = &mc_fault, + [19] = &xm_fault, +}; + +void setup_idt(void) +{ + int i; + static bool idt_initialized = false; + + if (idt_initialized) { + return; + } + idt_initialized = true; + for (i = 0; i < 32; i++) + if (idt_handlers[i]) + set_idt_entry(i, idt_handlers[i], 0); + handle_exception(0, check_exception_table); + handle_exception(6, check_exception_table); + handle_exception(13, check_exception_table); +} + +unsigned exception_vector(void) +{ + unsigned char vector; + + asm("movb %%gs:4, %0" : "=q"(vector)); + return vector; +} + +unsigned exception_error_code(void) +{ + unsigned short error_code; + + asm("mov %%gs:6, %0" : "=rm"(error_code)); + return error_code; +} + +bool exception_rflags_rf(void) +{ + unsigned char rf_flag; + + asm("movb %%gs:5, %b0" : "=q"(rf_flag)); + return rf_flag & 1; +} + +static char intr_alt_stack[4096]; + +#ifndef __x86_64__ +/* + * GDT, with 6 entries: + * 0x00 - NULL descriptor + * 0x08 - Code segment (ring 0) + * 0x10 - Data segment (ring 0) + * 0x18 - Not present code segment (ring 0) + * 0x20 - Code segment (ring 3) + * 0x28 - Data segment (ring 3) + * 0x30 - Interrupt task + * 0x38 to 0x78 - Free to use for test cases + * 0x80 - Primary task (CPU 0) + */ + +void set_gdt_entry(int sel, u32 base, u32 limit, u8 access, u8 gran) +{ + int num = sel >> 3; + + /* Setup the descriptor base address */ + gdt32[num].base_low = (base & 0xFFFF); + gdt32[num].base_middle = (base >> 16) & 0xFF; + gdt32[num].base_high = (base >> 24) & 0xFF; + + /* Setup the descriptor limits */ + gdt32[num].limit_low = (limit & 0xFFFF); + gdt32[num].granularity = ((limit >> 16) & 0x0F); + + /* Finally, set up the granularity and access flags */ + gdt32[num].granularity |= (gran & 0xF0); + gdt32[num].access = access; +} + +void set_gdt_task_gate(u16 sel, u16 tss_sel) +{ + set_gdt_entry(sel, tss_sel, 0, 0x85, 0); // task, present +} + +void set_idt_task_gate(int vec, u16 sel) +{ + idt_entry_t *e = &boot_idt[vec]; + + memset(e, 0, sizeof *e); + + e->selector = sel; + e->ist = 0; + e->type = 5; + e->dpl = 0; + e->p = 1; +} + +/* + * 0 - main task + * 1 - interrupt task + */ + +tss32_t tss_intr; + +void setup_tss32(void) +{ + u16 desc_size = sizeof(tss32_t); + + tss.cr3 = read_cr3(); + tss_intr.cr3 = read_cr3(); + tss_intr.ss0 = tss_intr.ss1 = tss_intr.ss2 = 0x10; + tss_intr.esp = tss_intr.esp0 = tss_intr.esp1 = tss_intr.esp2 = + (u32)intr_alt_stack + 4096; + tss_intr.cs = 0x08; + tss_intr.ds = tss_intr.es = tss_intr.fs = tss_intr.gs = tss_intr.ss = 0x10; + tss_intr.iomap_base = (u16)desc_size; + set_gdt_entry(TSS_INTR, (u32)&tss_intr, desc_size - 1, 0x89, 0x0f); +} + +void set_intr_task_gate(int e, void *fn) +{ + tss_intr.eip = (u32)fn; + set_idt_task_gate(e, TSS_INTR); +} + +void setup_alt_stack(void) +{ + setup_tss32(); +} + +void set_intr_alt_stack(int e, void *fn) +{ + set_intr_task_gate(e, fn); +} + +void print_current_tss_info(void) +{ + u16 tr = str(); + + if (tr != TSS_MAIN && tr != TSS_INTR) + printf("Unknown TSS %x\n", tr); + else + printf("TR=%x (%s) Main TSS back link %x. Intr TSS back link %x\n", + tr, tr ? "interrupt" : "main", tss.prev, tss_intr.prev); +} +#else +void set_intr_alt_stack(int e, void *addr) +{ + set_idt_entry(e, addr, 0); + boot_idt[e].ist = 1; +} + +void setup_alt_stack(void) +{ + tss.ist1 = (u64)intr_alt_stack + 4096; +} +#endif + +static bool exception; +static jmp_buf *exception_jmpbuf; + +static void exception_handler_longjmp(void) +{ + longjmp(*exception_jmpbuf, 1); +} + +static void exception_handler(struct ex_regs *regs) +{ + /* longjmp must happen after iret, so do not do it now. */ + exception = true; + regs->rip = (unsigned long)&exception_handler_longjmp; +} + +bool test_for_exception(unsigned int ex, void (*trigger_func)(void *data), + void *data) +{ + jmp_buf jmpbuf; + int ret; + + handle_exception(ex, exception_handler); + ret = set_exception_jmpbuf(jmpbuf); + if (ret == 0) + trigger_func(data); + handle_exception(ex, NULL); + return ret; +} + +void __set_exception_jmpbuf(jmp_buf *addr) +{ + exception_jmpbuf = addr; +} diff --git a/tests/kvm-unit-tests/lib/x86/desc.h b/tests/kvm-unit-tests/lib/x86/desc.h new file mode 100644 index 00000000..be52fd4e --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/desc.h @@ -0,0 +1,164 @@ +#ifndef __IDT_TEST__ +#define __IDT_TEST__ + +#include + +void setup_idt(void); +void setup_alt_stack(void); + +struct ex_regs { + unsigned long rax, rcx, rdx, rbx; + unsigned long dummy, rbp, rsi, rdi; +#ifdef __x86_64__ + unsigned long r8, r9, r10, r11; + unsigned long r12, r13, r14, r15; +#endif + unsigned long vector; + unsigned long error_code; + unsigned long rip; + unsigned long cs; + unsigned long rflags; +}; + +typedef struct { + u16 prev; + u16 res1; + u32 esp0; + u16 ss0; + u16 res2; + u32 esp1; + u16 ss1; + u16 res3; + u32 esp2; + u16 ss2; + u16 res4; + u32 cr3; + u32 eip; + u32 eflags; + u32 eax, ecx, edx, ebx, esp, ebp, esi, edi; + u16 es; + u16 res5; + u16 cs; + u16 res6; + u16 ss; + u16 res7; + u16 ds; + u16 res8; + u16 fs; + u16 res9; + u16 gs; + u16 res10; + u16 ldt; + u16 res11; + u16 t:1; + u16 res12:15; + u16 iomap_base; +} tss32_t; + +typedef struct __attribute__((packed)) { + u32 res1; + u64 rsp0; + u64 rsp1; + u64 rsp2; + u64 res2; + u64 ist1; + u64 ist2; + u64 ist3; + u64 ist4; + u64 ist5; + u64 ist6; + u64 ist7; + u64 res3; + u16 res4; + u16 iomap_base; +} tss64_t; + +#define ASM_TRY(catch) \ + "movl $0, %%gs:4 \n\t" \ + ".pushsection .data.ex \n\t" \ + ".quad 1111f, " catch "\n\t" \ + ".popsection \n\t" \ + "1111:" + +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define UD_VECTOR 6 +#define GP_VECTOR 13 + +#define KERNEL_CS 0x08 +#define KERNEL_DS 0x10 +#define NP_SEL 0x18 +#define USER_CS 0x23 +#define USER_DS 0x2b +#ifdef __x86_64__ +#define KERNEL_CS64 KERNEL_CS +#define KERNEL_DS64 KERNEL_DS +#define KERNEL_CS32 0x30 +#define KERNEL_DS32 0x38 +#define KERNEL_CS16 0x40 +#define KERNEL_DS16 0x48 +#else +#define KERNEL_CS32 KERNEL_CS +#define KERNEL_DS32 KERNEL_DS +#endif +#define TSS_INTR 0x50 +#define FIRST_SPARE_SEL 0x58 +#define TSS_MAIN 0x80 + +typedef struct { + unsigned short offset0; + unsigned short selector; + unsigned short ist : 3; + unsigned short : 5; + unsigned short type : 4; + unsigned short : 1; + unsigned short dpl : 2; + unsigned short p : 1; + unsigned short offset1; +#ifdef __x86_64__ + unsigned offset2; + unsigned reserved; +#endif +} idt_entry_t; + +typedef struct { + u16 limit_low; + u16 base_low; + u8 base_middle; + u8 access; + u8 granularity; + u8 base_high; +} gdt_entry_t; + +extern idt_entry_t boot_idt[256]; + +#ifndef __x86_64__ +extern gdt_entry_t gdt32[]; +extern tss32_t tss; +extern tss32_t tss_intr; +void set_gdt_task_gate(u16 tss_sel, u16 sel); +void set_idt_task_gate(int vec, u16 sel); +void set_intr_task_gate(int vec, void *fn); +void setup_tss32(void); +#else +extern tss64_t tss; +#endif + +unsigned exception_vector(void); +unsigned exception_error_code(void); +bool exception_rflags_rf(void); +void set_idt_entry(int vec, void *addr, int dpl); +void set_idt_sel(int vec, u16 sel); +void set_idt_dpl(int vec, u16 dpl); +void set_gdt_entry(int sel, u32 base, u32 limit, u8 access, u8 gran); +void set_intr_alt_stack(int e, void *fn); +void print_current_tss_info(void); +void handle_exception(u8 v, void (*func)(struct ex_regs *regs)); + +bool test_for_exception(unsigned int ex, void (*trigger_func)(void *data), + void *data); +void __set_exception_jmpbuf(jmp_buf *addr); +#define set_exception_jmpbuf(jmpbuf) \ + (setjmp(jmpbuf) ? : (__set_exception_jmpbuf(&(jmpbuf)), 0)) + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/fake-apic.h b/tests/kvm-unit-tests/lib/x86/fake-apic.h new file mode 100644 index 00000000..eed63bae --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/fake-apic.h @@ -0,0 +1,14 @@ +#ifndef SILLY_APIC_H +#define SILLY_APIC_H + +#define APIC_BASE 0x1000 +#define APIC_SIZE 0x100 + +#define APIC_REG_NCPU 0x00 +#define APIC_REG_ID 0x04 +#define APIC_REG_SIPI_ADDR 0x08 +#define APIC_REG_SEND_SIPI 0x0c +#define APIC_REG_IPI_VECTOR 0x10 +#define APIC_REG_SEND_IPI 0x14 + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/fwcfg.c b/tests/kvm-unit-tests/lib/x86/fwcfg.c new file mode 100644 index 00000000..e2cdd157 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/fwcfg.c @@ -0,0 +1,45 @@ +#include "fwcfg.h" +#include "smp.h" + +static struct spinlock lock; + +uint64_t fwcfg_get_u(uint16_t index, int bytes) +{ + uint64_t r = 0; + uint8_t b; + int i; + + spin_lock(&lock); + asm volatile ("out %0, %1" : : "a"(index), "d"((uint16_t)BIOS_CFG_IOPORT)); + for (i = 0; i < bytes; ++i) { + asm volatile ("in %1, %0" : "=a"(b) : "d"((uint16_t)(BIOS_CFG_IOPORT + 1))); + r |= (uint64_t)b << (i * 8); + } + spin_unlock(&lock); + return r; +} + +uint8_t fwcfg_get_u8(unsigned index) +{ + return fwcfg_get_u(index, 1); +} + +uint16_t fwcfg_get_u16(unsigned index) +{ + return fwcfg_get_u(index, 2); +} + +uint32_t fwcfg_get_u32(unsigned index) +{ + return fwcfg_get_u(index, 4); +} + +uint64_t fwcfg_get_u64(unsigned index) +{ + return fwcfg_get_u(index, 8); +} + +unsigned fwcfg_get_nb_cpus(void) +{ + return fwcfg_get_u16(FW_CFG_NB_CPUS); +} diff --git a/tests/kvm-unit-tests/lib/x86/fwcfg.h b/tests/kvm-unit-tests/lib/x86/fwcfg.h new file mode 100644 index 00000000..e0836ca4 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/fwcfg.h @@ -0,0 +1,44 @@ +#ifndef FWCFG_H +#define FWCFG_H + +#include + +#define FW_CFG_SIGNATURE 0x00 +#define FW_CFG_ID 0x01 +#define FW_CFG_UUID 0x02 +#define FW_CFG_RAM_SIZE 0x03 +#define FW_CFG_NOGRAPHIC 0x04 +#define FW_CFG_NB_CPUS 0x05 +#define FW_CFG_MACHINE_ID 0x06 +#define FW_CFG_KERNEL_ADDR 0x07 +#define FW_CFG_KERNEL_SIZE 0x08 +#define FW_CFG_KERNEL_CMDLINE 0x09 +#define FW_CFG_INITRD_ADDR 0x0a +#define FW_CFG_INITRD_SIZE 0x0b +#define FW_CFG_BOOT_DEVICE 0x0c +#define FW_CFG_NUMA 0x0d +#define FW_CFG_BOOT_MENU 0x0e +#define FW_CFG_MAX_CPUS 0x0f +#define FW_CFG_MAX_ENTRY 0x10 + +#define FW_CFG_WRITE_CHANNEL 0x4000 +#define FW_CFG_ARCH_LOCAL 0x8000 +#define FW_CFG_ENTRY_MASK ~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL) + +#define FW_CFG_INVALID 0xffff + +#define BIOS_CFG_IOPORT 0x510 + +#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) +#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) +#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) + +uint8_t fwcfg_get_u8(unsigned index); +uint16_t fwcfg_get_u16(unsigned index); +uint32_t fwcfg_get_u32(unsigned index); +uint64_t fwcfg_get_u64(unsigned index); + +unsigned fwcfg_get_nb_cpus(void); + +#endif + diff --git a/tests/kvm-unit-tests/lib/x86/intel-iommu.c b/tests/kvm-unit-tests/lib/x86/intel-iommu.c new file mode 100644 index 00000000..7cc5a702 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/intel-iommu.c @@ -0,0 +1,372 @@ +/* + * Intel IOMMU APIs + * + * Copyright (C) 2016 Red Hat, Inc. + * + * Authors: + * Peter Xu , + * + * This work is licensed under the terms of the GNU LGPL, version 2 or + * later. + */ + +#include "intel-iommu.h" +#include "libcflat.h" +#include "pci.h" +#include "atomic.h" + +/* + * VT-d in QEMU currently only support 39 bits address width, which is + * 3-level translation. + */ +#define VTD_PAGE_LEVEL 3 +#define VTD_CE_AW_39BIT 0x1 + +typedef uint64_t vtd_pte_t; + +struct vtd_root_entry { + /* Quad 1 */ + uint64_t present:1; + uint64_t __reserved:11; + uint64_t context_table_p:52; + /* Quad 2 */ + uint64_t __reserved_2; +} __attribute__ ((packed)); +typedef struct vtd_root_entry vtd_re_t; + +struct vtd_context_entry { + /* Quad 1 */ + uint64_t present:1; + uint64_t disable_fault_report:1; + uint64_t trans_type:2; + uint64_t __reserved:8; + uint64_t slptptr:52; + /* Quad 2 */ + uint64_t addr_width:3; + uint64_t __ignore:4; + uint64_t __reserved_2:1; + uint64_t domain_id:16; + uint64_t __reserved_3:40; +} __attribute__ ((packed)); +typedef struct vtd_context_entry vtd_ce_t; + +struct vtd_irte { + uint32_t present:1; + uint32_t fault_disable:1; /* Fault Processing Disable */ + uint32_t dest_mode:1; /* Destination Mode */ + uint32_t redir_hint:1; /* Redirection Hint */ + uint32_t trigger_mode:1; /* Trigger Mode */ + uint32_t delivery_mode:3; /* Delivery Mode */ + uint32_t __avail:4; /* Available spaces for software */ + uint32_t __reserved_0:3; /* Reserved 0 */ + uint32_t irte_mode:1; /* IRTE Mode */ + uint32_t vector:8; /* Interrupt Vector */ + uint32_t __reserved_1:8; /* Reserved 1 */ + uint32_t dest_id; /* Destination ID */ + uint16_t source_id:16; /* Source-ID */ + uint64_t sid_q:2; /* Source-ID Qualifier */ + uint64_t sid_vtype:2; /* Source-ID Validation Type */ + uint64_t __reserved_2:44; /* Reserved 2 */ +} __attribute__ ((packed)); +typedef struct vtd_irte vtd_irte_t; + +#define VTD_RTA_MASK (PAGE_MASK) +#define VTD_IRTA_MASK (PAGE_MASK) + +void *vtd_reg_base; + +static uint64_t vtd_root_table(void) +{ + /* No extend root table support yet */ + return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK; +} + +static uint64_t vtd_ir_table(void) +{ + return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK; +} + +static void vtd_gcmd_or(uint32_t cmd) +{ + uint32_t status; + + /* We only allow set one bit for each time */ + assert(is_power_of_2(cmd)); + + status = vtd_readl(DMAR_GSTS_REG); + vtd_writel(DMAR_GCMD_REG, status | cmd); + + if (cmd & VTD_GCMD_ONE_SHOT_BITS) { + /* One-shot bits are taking effect immediately */ + return; + } + + /* Make sure IOMMU handled our command request */ + while (!(vtd_readl(DMAR_GSTS_REG) & cmd)) + cpu_relax(); +} + +static void vtd_dump_init_info(void) +{ + uint32_t version; + + version = vtd_readl(DMAR_VER_REG); + + /* Major version >= 1 */ + assert(((version >> 3) & 0xf) >= 1); + + printf("VT-d version: 0x%x\n", version); + printf(" cap: 0x%016lx\n", vtd_readq(DMAR_CAP_REG)); + printf(" ecap: 0x%016lx\n", vtd_readq(DMAR_ECAP_REG)); +} + +static void vtd_setup_root_table(void) +{ + void *root = alloc_page(); + + memset(root, 0, PAGE_SIZE); + vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root)); + vtd_gcmd_or(VTD_GCMD_ROOT); + printf("DMAR table address: 0x%016lx\n", vtd_root_table()); +} + +static void vtd_setup_ir_table(void) +{ + void *root = alloc_page(); + + memset(root, 0, PAGE_SIZE); + /* 0xf stands for table size (2^(0xf+1) == 65536) */ + vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf); + vtd_gcmd_or(VTD_GCMD_IR_TABLE); + printf("IR table address: 0x%016lx\n", vtd_ir_table()); +} + +static void vtd_install_pte(vtd_pte_t *root, iova_t iova, + phys_addr_t pa, int level_target) +{ + int level; + unsigned int offset; + void *page; + + for (level = VTD_PAGE_LEVEL; level > level_target; level--) { + offset = PGDIR_OFFSET(iova, level); + if (!(root[offset] & VTD_PTE_RW)) { + page = alloc_page(); + memset(page, 0, PAGE_SIZE); + root[offset] = virt_to_phys(page) | VTD_PTE_RW; + } + root = (uint64_t *)(phys_to_virt(root[offset] & + VTD_PTE_ADDR)); + } + + offset = PGDIR_OFFSET(iova, level); + root[offset] = pa | VTD_PTE_RW; + if (level != 1) { + /* This is huge page */ + root[offset] |= VTD_PTE_HUGE; + } +} + +/** + * vtd_map_range: setup IO address mapping for specific memory range + * + * @sid: source ID of the device to setup + * @iova: start IO virtual address + * @pa: start physical address + * @size: size of the mapping area + */ +void vtd_map_range(uint16_t sid, iova_t iova, phys_addr_t pa, size_t size) +{ + uint8_t bus_n, devfn; + void *slptptr; + vtd_ce_t *ce; + vtd_re_t *re = phys_to_virt(vtd_root_table()); + + assert(IS_ALIGNED(iova, SZ_4K)); + assert(IS_ALIGNED(pa, SZ_4K)); + assert(IS_ALIGNED(size, SZ_4K)); + + bus_n = PCI_BDF_GET_BUS(sid); + devfn = PCI_BDF_GET_DEVFN(sid); + + /* Point to the correct root entry */ + re += bus_n; + + if (!re->present) { + ce = alloc_page(); + memset(ce, 0, PAGE_SIZE); + memset(re, 0, sizeof(*re)); + re->context_table_p = virt_to_phys(ce) >> VTD_PAGE_SHIFT; + re->present = 1; + printf("allocated vt-d root entry for PCI bus %d\n", + bus_n); + } else + ce = phys_to_virt(re->context_table_p << VTD_PAGE_SHIFT); + + /* Point to the correct context entry */ + ce += devfn; + + if (!ce->present) { + slptptr = alloc_page(); + memset(slptptr, 0, PAGE_SIZE); + memset(ce, 0, sizeof(*ce)); + /* To make it simple, domain ID is the same as SID */ + ce->domain_id = sid; + /* We only test 39 bits width case (3-level paging) */ + ce->addr_width = VTD_CE_AW_39BIT; + ce->slptptr = virt_to_phys(slptptr) >> VTD_PAGE_SHIFT; + ce->trans_type = VTD_CONTEXT_TT_MULTI_LEVEL; + ce->present = 1; + /* No error reporting yet */ + ce->disable_fault_report = 1; + printf("allocated vt-d context entry for devfn 0x%x\n", + devfn); + } else + slptptr = phys_to_virt(ce->slptptr << VTD_PAGE_SHIFT); + + while (size) { + /* TODO: currently we only map 4K pages (level = 1) */ + printf("map 4K page IOVA 0x%lx to 0x%lx (sid=0x%04x)\n", + iova, pa, sid); + vtd_install_pte(slptptr, iova, pa, 1); + size -= VTD_PAGE_SIZE; + iova += VTD_PAGE_SIZE; + pa += VTD_PAGE_SIZE; + } +} + +static uint16_t vtd_intr_index_alloc(void) +{ + static volatile int index_ctr = 0; + int ctr; + + assert(index_ctr < 65535); + ctr = atomic_inc_fetch(&index_ctr); + printf("INTR: alloc IRTE index %d\n", ctr); + return ctr; +} + +static void vtd_setup_irte(struct pci_dev *dev, vtd_irte_t *irte, + int vector, int dest_id, trigger_mode_t trigger) +{ + assert(sizeof(vtd_irte_t) == 16); + memset(irte, 0, sizeof(*irte)); + irte->fault_disable = 1; + irte->dest_mode = 0; /* physical */ + irte->trigger_mode = trigger; + irte->delivery_mode = 0; /* fixed */ + irte->irte_mode = 0; /* remapped */ + irte->vector = vector; + irte->dest_id = dest_id; + irte->source_id = dev->bdf; + irte->sid_q = 0; + irte->sid_vtype = 1; /* full-sid verify */ + irte->present = 1; +} + +struct vtd_msi_addr { + uint32_t __dont_care:2; + uint32_t handle_15:1; /* handle[15] */ + uint32_t shv:1; + uint32_t interrupt_format:1; + uint32_t handle_0_14:15; /* handle[0:14] */ + uint32_t head:12; /* 0xfee */ + uint32_t addr_hi; /* not used except with x2apic */ +} __attribute__ ((packed)); +typedef struct vtd_msi_addr vtd_msi_addr_t; + +struct vtd_msi_data { + uint16_t __reserved; + uint16_t subhandle; +} __attribute__ ((packed)); +typedef struct vtd_msi_data vtd_msi_data_t; + +struct vtd_ioapic_entry { + uint64_t vector:8; + uint64_t __zeros:3; + uint64_t index_15:1; + uint64_t delivery_status:1; + uint64_t polarity:1; + uint64_t remote_irr:1; + uint64_t trigger_mode:1; + uint64_t mask:1; + uint64_t __zeros_2:31; + uint64_t interrupt_format:1; + uint64_t index_0_14:15; +} __attribute__ ((packed)); +typedef struct vtd_ioapic_entry vtd_ioapic_entry_t; + +/** + * vtd_setup_msi - setup MSI message for a device + * + * @dev: PCI device to setup MSI + * @vector: interrupt vector + * @dest_id: destination processor + */ +bool vtd_setup_msi(struct pci_dev *dev, int vector, int dest_id) +{ + vtd_msi_data_t msi_data = {}; + vtd_msi_addr_t msi_addr = {}; + vtd_irte_t *irte = phys_to_virt(vtd_ir_table()); + uint16_t index = vtd_intr_index_alloc(); + + assert(sizeof(vtd_msi_addr_t) == 8); + assert(sizeof(vtd_msi_data_t) == 4); + + /* Use edge irq as default */ + vtd_setup_irte(dev, irte + index, vector, + dest_id, TRIGGER_EDGE); + + msi_addr.handle_15 = index >> 15 & 1; + msi_addr.shv = 0; + msi_addr.interrupt_format = 1; + msi_addr.handle_0_14 = index & 0x7fff; + msi_addr.head = 0xfee; + msi_data.subhandle = 0; + + printf("%s: msi_addr=0x%" PRIx64 ", msi_data=0x%x\n", __func__, + *(uint64_t *)&msi_addr, *(uint32_t *)&msi_data); + + return pci_setup_msi(dev, *(uint64_t *)&msi_addr, + *(uint32_t *)&msi_data); +} + +void vtd_setup_ioapic_irq(struct pci_dev *dev, int vector, + int dest_id, trigger_mode_t trigger) +{ + vtd_ioapic_entry_t entry = {}; + vtd_irte_t *irte = phys_to_virt(vtd_ir_table()); + ioapic_redir_entry_t *entry_2 = (ioapic_redir_entry_t *)&entry; + uint16_t index = vtd_intr_index_alloc(); + uint8_t line; + + assert(dev); + assert(sizeof(vtd_ioapic_entry_t) == 8); + + vtd_setup_irte(dev, irte + index, vector, + dest_id, trigger); + + entry.vector = vector; + entry.trigger_mode = trigger; + entry.index_15 = (index >> 15) & 1; + entry.interrupt_format = 1; + entry.index_0_14 = index & 0x7fff; + + line = pci_intx_line(dev); + ioapic_write_redir(line, *entry_2); +} + +void vtd_init(void) +{ + setup_vm(); + smp_init(); + + vtd_reg_base = ioremap(Q35_HOST_BRIDGE_IOMMU_ADDR, PAGE_SIZE); + + vtd_dump_init_info(); + vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */ + vtd_setup_root_table(); + vtd_setup_ir_table(); + vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */ + vtd_gcmd_or(VTD_GCMD_IR); /* Enable IR */ +} diff --git a/tests/kvm-unit-tests/lib/x86/intel-iommu.h b/tests/kvm-unit-tests/lib/x86/intel-iommu.h new file mode 100644 index 00000000..885be538 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/intel-iommu.h @@ -0,0 +1,149 @@ +/* + * Intel IOMMU header + * + * Copyright (C) 2016 Red Hat, Inc. + * + * Authors: + * Peter Xu , + * + * This work is licensed under the terms of the GNU LGPL, version 2 or + * later. + * + * (From include/linux/intel-iommu.h) + */ + +#ifndef __INTEL_IOMMU_H__ +#define __INTEL_IOMMU_H__ + +#include "libcflat.h" +#include "vm.h" +#include "isr.h" +#include "smp.h" +#include "desc.h" +#include "pci.h" +#include "asm/io.h" +#include "apic.h" + +#define Q35_HOST_BRIDGE_IOMMU_ADDR 0xfed90000ULL +#define VTD_PAGE_SHIFT PAGE_SHIFT +#define VTD_PAGE_SIZE PAGE_SIZE + +/* + * Intel IOMMU register specification + */ +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_CAP_REG_HI 0xc /* High 32-bit of DMAR_CAP_REG */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_ECAP_REG_HI 0X14 +#define DMAR_GCMD_REG 0x18 /* Global command */ +#define DMAR_GSTS_REG 0x1c /* Global status */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_RTADDR_REG_HI 0X24 +#define DMAR_CCMD_REG 0x28 /* Context command */ +#define DMAR_CCMD_REG_HI 0x2c +#define DMAR_FSTS_REG 0x34 /* Fault status */ +#define DMAR_FECTL_REG 0x38 /* Fault control */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address */ +#define DMAR_AFLOG_REG 0x58 /* Advanced fault control */ +#define DMAR_AFLOG_REG_HI 0X5c +#define DMAR_PMEN_REG 0x64 /* Enable protected memory region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* PMRR high base addr */ +#define DMAR_PHMBASE_REG_HI 0X74 +#define DMAR_PHMLIMIT_REG 0x78 /* PMRR high limit */ +#define DMAR_PHMLIMIT_REG_HI 0x7c +#define DMAR_IQH_REG 0x80 /* Invalidation queue head */ +#define DMAR_IQH_REG_HI 0X84 +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail */ +#define DMAR_IQT_REG_HI 0X8c +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr */ +#define DMAR_IQA_REG_HI 0x94 +#define DMAR_ICS_REG 0x9c /* Invalidation complete status */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr */ +#define DMAR_IRTA_REG_HI 0xbc +#define DMAR_IECTL_REG 0xa0 /* Invalidation event control */ +#define DMAR_IEDATA_REG 0xa4 /* Invalidation event data */ +#define DMAR_IEADDR_REG 0xa8 /* Invalidation event address */ +#define DMAR_IEUADDR_REG 0xac /* Invalidation event address */ +#define DMAR_PQH_REG 0xc0 /* Page request queue head */ +#define DMAR_PQH_REG_HI 0xc4 +#define DMAR_PQT_REG 0xc8 /* Page request queue tail*/ +#define DMAR_PQT_REG_HI 0xcc +#define DMAR_PQA_REG 0xd0 /* Page request queue address */ +#define DMAR_PQA_REG_HI 0xd4 +#define DMAR_PRS_REG 0xdc /* Page request status */ +#define DMAR_PECTL_REG 0xe0 /* Page request event control */ +#define DMAR_PEDATA_REG 0xe4 /* Page request event data */ +#define DMAR_PEADDR_REG 0xe8 /* Page request event address */ +#define DMAR_PEUADDR_REG 0xec /* Page event upper address */ +#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability */ +#define DMAR_MTRRCAP_REG_HI 0x104 +#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type */ +#define DMAR_MTRRDEF_REG_HI 0x10c + +#define VTD_GCMD_IR_TABLE 0x1000000 +#define VTD_GCMD_IR 0x2000000 +#define VTD_GCMD_QI 0x4000000 +#define VTD_GCMD_WBF 0x8000000 /* Write Buffer Flush */ +#define VTD_GCMD_SFL 0x20000000 /* Set Fault Log */ +#define VTD_GCMD_ROOT 0x40000000 +#define VTD_GCMD_DMAR 0x80000000 +#define VTD_GCMD_ONE_SHOT_BITS (VTD_GCMD_IR_TABLE | VTD_GCMD_WBF | \ + VTD_GCMD_SFL | VTD_GCMD_ROOT) + +/* Supported Adjusted Guest Address Widths */ +#define VTD_CAP_SAGAW_SHIFT 8 +/* 39-bit AGAW, 3-level page-table */ +#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) +/* 48-bit AGAW, 4-level page-table */ +#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) +#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit + +/* Both 1G/2M huge pages */ +#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) + +#define VTD_CONTEXT_TT_MULTI_LEVEL 0 +#define VTD_CONTEXT_TT_DEV_IOTLB 1 +#define VTD_CONTEXT_TT_PASS_THROUGH 2 + +#define VTD_PTE_R (1 << 0) +#define VTD_PTE_W (1 << 1) +#define VTD_PTE_RW (VTD_PTE_R | VTD_PTE_W) +#define VTD_PTE_ADDR GENMASK_ULL(63, 12) +#define VTD_PTE_HUGE (1 << 7) + +extern void *vtd_reg_base; +#define vtd_reg(reg) ({ assert(vtd_reg_base); \ + (volatile void *)(vtd_reg_base + reg); }) + +static inline void vtd_writel(unsigned int reg, uint32_t value) +{ + __raw_writel(value, vtd_reg(reg)); +} + +static inline void vtd_writeq(unsigned int reg, uint64_t value) +{ + __raw_writeq(value, vtd_reg(reg)); +} + +static inline uint32_t vtd_readl(unsigned int reg) +{ + return __raw_readl(vtd_reg(reg)); +} + +static inline uint64_t vtd_readq(unsigned int reg) +{ + return __raw_readq(vtd_reg(reg)); +} + +void vtd_init(void); +void vtd_map_range(uint16_t sid, phys_addr_t iova, phys_addr_t pa, size_t size); +bool vtd_setup_msi(struct pci_dev *dev, int vector, int dest_id); +void vtd_setup_ioapic_irq(struct pci_dev *dev, int vector, + int dest_id, trigger_mode_t trigger); + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/io.c b/tests/kvm-unit-tests/lib/x86/io.c new file mode 100644 index 00000000..cc5ac585 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/io.c @@ -0,0 +1,99 @@ +#include "libcflat.h" +#include "vm.h" +#include "smp.h" +#include "asm/io.h" +#include "asm/page.h" +#ifndef USE_SERIAL +#define USE_SERIAL +#endif + +static struct spinlock lock; +static int serial_iobase = 0x3f8; +static int serial_inited = 0; + +static void serial_outb(char ch) +{ + u8 lsr; + + do { + lsr = inb(serial_iobase + 0x05); + } while (!(lsr & 0x20)); + + outb(ch, serial_iobase + 0x00); +} + +static void serial_init(void) +{ + u8 lcr; + + /* set DLAB */ + lcr = inb(serial_iobase + 0x03); + lcr |= 0x80; + outb(lcr, serial_iobase + 0x03); + + /* set baud rate to 115200 */ + outb(0x01, serial_iobase + 0x00); + outb(0x00, serial_iobase + 0x01); + + /* clear DLAB */ + lcr = inb(serial_iobase + 0x03); + lcr &= ~0x80; + outb(lcr, serial_iobase + 0x03); +} + +static void print_serial(const char *buf) +{ + unsigned long len = strlen(buf); +#ifdef USE_SERIAL + unsigned long i; + if (!serial_inited) { + serial_init(); + serial_inited = 1; + } + + for (i = 0; i < len; i++) { + serial_outb(buf[i]); + } +#else + asm volatile ("rep/outsb" : "+S"(buf), "+c"(len) : "d"(0xf1)); +#endif +} + +void puts(const char *s) +{ + spin_lock(&lock); + print_serial(s); + spin_unlock(&lock); +} + +void exit(int code) +{ +#ifdef USE_SERIAL + static const char shutdown_str[8] = "Shutdown"; + int i; + + /* test device exit (with status) */ + outl(code, 0xf4); + + /* if that failed, try the Bochs poweroff port */ + for (i = 0; i < 8; i++) { + outb(shutdown_str[i], 0x8900); + } +#else + asm volatile("out %0, %1" : : "a"(code), "d"((short)0xf4)); +#endif +} + +void __iomem *ioremap(phys_addr_t phys_addr, size_t size) +{ + phys_addr_t base = phys_addr & PAGE_MASK; + phys_addr_t offset = phys_addr - base; + + /* + * The kernel sets PTEs for an ioremap() with page cache disabled, + * but we do not do that right now. It would make sense that I/O + * mappings would be uncached - and may help us find bugs when we + * properly map that way. + */ + return vmap(phys_addr, size) + offset; +} diff --git a/tests/kvm-unit-tests/lib/x86/isr.c b/tests/kvm-unit-tests/lib/x86/isr.c new file mode 100644 index 00000000..9b1d5054 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/isr.c @@ -0,0 +1,124 @@ +#include "libcflat.h" +#include "isr.h" +#include "vm.h" +#include "desc.h" + +extern char isr_entry_point[]; + +asm ( + "isr_entry_point: \n" +#ifdef __x86_64__ + "push %r15 \n\t" + "push %r14 \n\t" + "push %r13 \n\t" + "push %r12 \n\t" + "push %r11 \n\t" + "push %r10 \n\t" + "push %r9 \n\t" + "push %r8 \n\t" +#endif + "push %"R "di \n\t" + "push %"R "si \n\t" + "push %"R "bp \n\t" + "push %"R "sp \n\t" + "push %"R "bx \n\t" + "push %"R "dx \n\t" + "push %"R "cx \n\t" + "push %"R "ax \n\t" +#ifdef __x86_64__ + "mov %rsp, %rdi \n\t" + "callq *8*16(%rsp) \n\t" +#else + "push %esp \n\t" + "calll *4+4*8(%esp) \n\t" + "add $4, %esp \n\t" +#endif + "pop %"R "ax \n\t" + "pop %"R "cx \n\t" + "pop %"R "dx \n\t" + "pop %"R "bx \n\t" + "pop %"R "bp \n\t" + "pop %"R "bp \n\t" + "pop %"R "si \n\t" + "pop %"R "di \n\t" +#ifdef __x86_64__ + "pop %r8 \n\t" + "pop %r9 \n\t" + "pop %r10 \n\t" + "pop %r11 \n\t" + "pop %r12 \n\t" + "pop %r13 \n\t" + "pop %r14 \n\t" + "pop %r15 \n\t" +#endif + ".globl isr_iret_ip\n\t" +#ifdef __x86_64__ + "add $8, %rsp \n\t" + "isr_iret_ip: \n\t" + "iretq \n\t" +#else + "add $4, %esp \n\t" + "isr_iret_ip: \n\t" + "iretl \n\t" +#endif + ); + +void handle_irq(unsigned vec, void (*func)(isr_regs_t *regs)) +{ + u8 *thunk = vmalloc(50); + + set_idt_entry(vec, thunk, 0); + +#ifdef __x86_64__ + /* sub $8, %rsp */ + *thunk++ = 0x48; *thunk++ = 0x83; *thunk++ = 0xec; *thunk++ = 0x08; + /* mov $func_low, %(rsp) */ + *thunk++ = 0xc7; *thunk++ = 0x04; *thunk++ = 0x24; + *(u32 *)thunk = (ulong)func; thunk += 4; + /* mov $func_high, %(rsp+4) */ + *thunk++ = 0xc7; *thunk++ = 0x44; *thunk++ = 0x24; *thunk++ = 0x04; + *(u32 *)thunk = (ulong)func >> 32; thunk += 4; + /* jmp isr_entry_point */ + *thunk ++ = 0xe9; + *(u32 *)thunk = (ulong)isr_entry_point - (ulong)(thunk + 4); +#else + /* push $func */ + *thunk++ = 0x68; + *(u32 *)thunk = (ulong)func; thunk += 4; + /* jmp isr_entry_point */ + *thunk++ = 0xe9; + *(u32 *)thunk = (ulong)isr_entry_point - (ulong)(thunk + 4); +#endif +} + +void handle_external_interrupt(int vector) +{ + idt_entry_t *idt = &boot_idt[vector]; + unsigned long entry = + idt->offset0 | ((unsigned long)idt->offset1 << 16); +#ifdef __x86_64__ + unsigned long tmp; + entry |= ((unsigned long)idt->offset2 << 32); +#endif + + asm volatile( +#ifdef __x86_64__ + "mov %%rsp, %[sp]\n\t" + "and $0xfffffffffffffff0, %%rsp\n\t" + "push $%c[ss]\n\t" + "push %[sp]\n\t" +#endif + "pushf\n\t" + "orl $0x200, (%%"R "sp)\n\t" + "push $%c[cs]\n\t" + "call *%[entry]\n\t" + : +#ifdef __x86_64__ + [sp]"=&r"(tmp) +#endif + : + [entry]"r"(entry), + [ss]"i"(KERNEL_DS), + [cs]"i"(KERNEL_CS) + ); +} diff --git a/tests/kvm-unit-tests/lib/x86/isr.h b/tests/kvm-unit-tests/lib/x86/isr.h new file mode 100644 index 00000000..a5092919 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/isr.h @@ -0,0 +1,14 @@ +#ifndef __ISR_TEST__ +#define __ISR_TEST__ + +typedef struct { + ulong regs[sizeof(ulong)*2]; + ulong func; + ulong rip; + ulong cs; + ulong rflags; +} isr_regs_t; + +void handle_irq(unsigned vec, void (*func)(isr_regs_t *regs)); +void handle_external_interrupt(int vector); +#endif diff --git a/tests/kvm-unit-tests/lib/x86/msr.h b/tests/kvm-unit-tests/lib/x86/msr.h new file mode 100644 index 00000000..2c0598c6 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/msr.h @@ -0,0 +1,412 @@ +#ifndef _ASM_X86_MSR_INDEX_H +#define _ASM_X86_MSR_INDEX_H + +/* CPU model specific register (MSR) numbers */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ +#define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) +#define EFER_SVME (1<<_EFER_SVME) +#define EFER_LMSLE (1<<_EFER_LMSLE) +#define EFER_FFXSR (1<<_EFER_FFXSR) + +/* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd + +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define CMCI_EN (1ULL << 30) +#define CMCI_THRESHOLD_MASK 0xffffULL + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +/* AMD64 MSRs. Not complete. See the architecture manual for a more + complete list. */ + +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSCTL 0xc001103a + +/* Fam 10h MSRs */ +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#define FAM10H_MMIO_CONF_ENABLE (1<<0) +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +/* C1E active bits in int pending message */ +#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 +#define MSR_K8_TSEG_ADDR 0xc0010112 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 + +/* K6 MSRs */ +#define MSR_K6_EFER 0xc0000080 +#define MSR_K6_STAR 0xc0000081 +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) +#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b + +#define THERM_INT_LOW_ENABLE (1 << 0) +#define THERM_INT_HIGH_ENABLE (1 << 1) + +#define MSR_IA32_THERM_STATUS 0x0000019c + +#define THERM_STATUS_PROCHOT (1 << 0) + +#define MSR_THERM2_CTL 0x0000019d + +#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) + +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 + +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2) +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9) +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 + +#define MSR_IA32_TSCDEADLINE 0x000006e0 + +/* AMD-V MSRs */ + +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_IGNNE 0xc0010115 +#define MSR_VM_HSAVE_PA 0xc0010117 + +#endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/tests/kvm-unit-tests/lib/x86/processor.h b/tests/kvm-unit-tests/lib/x86/processor.h new file mode 100644 index 00000000..895d992a --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/processor.h @@ -0,0 +1,433 @@ +#ifndef LIBCFLAT_PROCESSOR_H +#define LIBCFLAT_PROCESSOR_H + +#include "libcflat.h" +#include "msr.h" +#include + +#ifdef __x86_64__ +# define R "r" +# define W "q" +# define S "8" +#else +# define R "e" +# define W "l" +# define S "4" +#endif + +#define X86_CR0_PE 0x00000001 +#define X86_CR0_MP 0x00000002 +#define X86_CR0_TS 0x00000008 +#define X86_CR0_WP 0x00010000 +#define X86_CR0_AM 0x00040000 +#define X86_CR0_PG 0x80000000 +#define X86_CR4_TSD 0x00000004 +#define X86_CR4_DE 0x00000008 +#define X86_CR4_PSE 0x00000010 +#define X86_CR4_PAE 0x00000020 +#define X86_CR4_VMXE 0x00002000 +#define X86_CR4_PCIDE 0x00020000 +#define X86_CR4_SMAP 0x00200000 +#define X86_CR4_PKE 0x00400000 + +#define X86_EFLAGS_CF 0x00000001 +#define X86_EFLAGS_PF 0x00000004 +#define X86_EFLAGS_AF 0x00000010 +#define X86_EFLAGS_ZF 0x00000040 +#define X86_EFLAGS_SF 0x00000080 +#define X86_EFLAGS_OF 0x00000800 +#define X86_EFLAGS_AC 0x00040000 + +#define X86_IA32_EFER 0xc0000080 +#define X86_EFER_LMA (1UL << 8) + +struct far_pointer32 { + u32 offset; + u16 selector; +} __attribute__((packed)); + +struct descriptor_table_ptr { + u16 limit; + ulong base; +} __attribute__((packed)); + +static inline void barrier(void) +{ + asm volatile ("" : : : "memory"); +} + +static inline void clac(void) +{ + asm volatile (".byte 0x0f, 0x01, 0xca" : : : "memory"); +} + +static inline void stac(void) +{ + asm volatile (".byte 0x0f, 0x01, 0xcb" : : : "memory"); +} + +static inline u16 read_cs(void) +{ + unsigned val; + + asm volatile ("mov %%cs, %0" : "=mr"(val)); + return val; +} + +static inline u16 read_ds(void) +{ + unsigned val; + + asm volatile ("mov %%ds, %0" : "=mr"(val)); + return val; +} + +static inline u16 read_es(void) +{ + unsigned val; + + asm volatile ("mov %%es, %0" : "=mr"(val)); + return val; +} + +static inline u16 read_ss(void) +{ + unsigned val; + + asm volatile ("mov %%ss, %0" : "=mr"(val)); + return val; +} + +static inline u16 read_fs(void) +{ + unsigned val; + + asm volatile ("mov %%fs, %0" : "=mr"(val)); + return val; +} + +static inline u16 read_gs(void) +{ + unsigned val; + + asm volatile ("mov %%gs, %0" : "=mr"(val)); + return val; +} + +static inline unsigned long read_rflags(void) +{ + unsigned long f; + asm volatile ("pushf; pop %0\n\t" : "=rm"(f)); + return f; +} + +static inline void write_ds(unsigned val) +{ + asm volatile ("mov %0, %%ds" : : "rm"(val) : "memory"); +} + +static inline void write_es(unsigned val) +{ + asm volatile ("mov %0, %%es" : : "rm"(val) : "memory"); +} + +static inline void write_ss(unsigned val) +{ + asm volatile ("mov %0, %%ss" : : "rm"(val) : "memory"); +} + +static inline void write_fs(unsigned val) +{ + asm volatile ("mov %0, %%fs" : : "rm"(val) : "memory"); +} + +static inline void write_gs(unsigned val) +{ + asm volatile ("mov %0, %%gs" : : "rm"(val) : "memory"); +} + +static inline void write_rflags(unsigned long f) +{ + asm volatile ("push %0; popf\n\t" : : "rm"(f)); +} + +static inline u64 rdmsr(u32 index) +{ + u32 a, d; + asm volatile ("rdmsr" : "=a"(a), "=d"(d) : "c"(index) : "memory"); + return a | ((u64)d << 32); +} + +static inline void wrmsr(u32 index, u64 val) +{ + u32 a = val, d = val >> 32; + asm volatile ("wrmsr" : : "a"(a), "d"(d), "c"(index) : "memory"); +} + +static inline uint64_t rdpmc(uint32_t index) +{ + uint32_t a, d; + asm volatile ("rdpmc" : "=a"(a), "=d"(d) : "c"(index)); + return a | ((uint64_t)d << 32); +} + +static inline void write_cr0(ulong val) +{ + asm volatile ("mov %0, %%cr0" : : "r"(val) : "memory"); +} + +static inline ulong read_cr0(void) +{ + ulong val; + asm volatile ("mov %%cr0, %0" : "=r"(val) : : "memory"); + return val; +} + +static inline void write_cr2(ulong val) +{ + asm volatile ("mov %0, %%cr2" : : "r"(val) : "memory"); +} + +static inline ulong read_cr2(void) +{ + ulong val; + asm volatile ("mov %%cr2, %0" : "=r"(val) : : "memory"); + return val; +} + +static inline void write_cr3(ulong val) +{ + asm volatile ("mov %0, %%cr3" : : "r"(val) : "memory"); +} + +static inline ulong read_cr3(void) +{ + ulong val; + asm volatile ("mov %%cr3, %0" : "=r"(val) : : "memory"); + return val; +} + +static inline void write_cr4(ulong val) +{ + asm volatile ("mov %0, %%cr4" : : "r"(val) : "memory"); +} + +static inline ulong read_cr4(void) +{ + ulong val; + asm volatile ("mov %%cr4, %0" : "=r"(val) : : "memory"); + return val; +} + +static inline void write_cr8(ulong val) +{ + asm volatile ("mov %0, %%cr8" : : "r"(val) : "memory"); +} + +static inline ulong read_cr8(void) +{ + ulong val; + asm volatile ("mov %%cr8, %0" : "=r"(val) : : "memory"); + return val; +} + +static inline void lgdt(const struct descriptor_table_ptr *ptr) +{ + asm volatile ("lgdt %0" : : "m"(*ptr)); +} + +static inline void sgdt(struct descriptor_table_ptr *ptr) +{ + asm volatile ("sgdt %0" : "=m"(*ptr)); +} + +static inline void lidt(const struct descriptor_table_ptr *ptr) +{ + asm volatile ("lidt %0" : : "m"(*ptr)); +} + +static inline void sidt(struct descriptor_table_ptr *ptr) +{ + asm volatile ("sidt %0" : "=m"(*ptr)); +} + +static inline void lldt(unsigned val) +{ + asm volatile ("lldt %0" : : "rm"(val)); +} + +static inline u16 sldt(void) +{ + u16 val; + asm volatile ("sldt %0" : "=rm"(val)); + return val; +} + +static inline void ltr(u16 val) +{ + asm volatile ("ltr %0" : : "rm"(val)); +} + +static inline u16 str(void) +{ + u16 val; + asm volatile ("str %0" : "=rm"(val)); + return val; +} + +static inline void write_dr6(ulong val) +{ + asm volatile ("mov %0, %%dr6" : : "r"(val) : "memory"); +} + +static inline ulong read_dr6(void) +{ + ulong val; + asm volatile ("mov %%dr6, %0" : "=r"(val)); + return val; +} + +static inline void write_dr7(ulong val) +{ + asm volatile ("mov %0, %%dr7" : : "r"(val) : "memory"); +} + +static inline ulong read_dr7(void) +{ + ulong val; + asm volatile ("mov %%dr7, %0" : "=r"(val)); + return val; +} + +struct cpuid { u32 a, b, c, d; }; + +static inline struct cpuid raw_cpuid(u32 function, u32 index) +{ + struct cpuid r; + asm volatile ("cpuid" + : "=a"(r.a), "=b"(r.b), "=c"(r.c), "=d"(r.d) + : "0"(function), "2"(index)); + return r; +} + +static inline struct cpuid cpuid_indexed(u32 function, u32 index) +{ + u32 level = raw_cpuid(function & 0xf0000000, 0).a; + if (level < function) + return (struct cpuid) { 0, 0, 0, 0 }; + return raw_cpuid(function, index); +} + +static inline struct cpuid cpuid(u32 function) +{ + return cpuid_indexed(function, 0); +} + +static inline u8 cpuid_maxphyaddr(void) +{ + if (raw_cpuid(0x80000000, 0).a < 0x80000008) + return 36; + return raw_cpuid(0x80000008, 0).a & 0xff; +} + + +static inline void pause(void) +{ + asm volatile ("pause"); +} + +static inline void cli(void) +{ + asm volatile ("cli"); +} + +static inline void sti(void) +{ + asm volatile ("sti"); +} + +static inline unsigned long long rdtsc() +{ + long long r; + +#ifdef __x86_64__ + unsigned a, d; + + asm volatile ("rdtsc" : "=a"(a), "=d"(d)); + r = a | ((long long)d << 32); +#else + asm volatile ("rdtsc" : "=A"(r)); +#endif + return r; +} + +static inline unsigned long long rdtscp(u32 *aux) +{ + long long r; + +#ifdef __x86_64__ + unsigned a, d; + + asm volatile ("rdtscp" : "=a"(a), "=d"(d), "=c"(*aux)); + r = a | ((long long)d << 32); +#else + asm volatile ("rdtscp" : "=A"(r), "=c"(*aux)); +#endif + return r; +} + +static inline void wrtsc(u64 tsc) +{ + unsigned a = tsc, d = tsc >> 32; + + asm volatile("wrmsr" : : "a"(a), "d"(d), "c"(0x10)); +} + +static inline void irq_disable(void) +{ + asm volatile("cli"); +} + +/* Note that irq_enable() does not ensure an interrupt shadow due + * to the vagaries of compiler optimizations. If you need the + * shadow, use a single asm with "sti" and the instruction after it. + */ +static inline void irq_enable(void) +{ + asm volatile("sti"); +} + +static inline void invlpg(volatile void *va) +{ + asm volatile("invlpg (%0)" ::"r" (va) : "memory"); +} + +static inline void safe_halt(void) +{ + asm volatile("sti; hlt"); +} + +static inline u32 read_pkru(void) +{ + unsigned int eax, edx; + unsigned int ecx = 0; + unsigned int pkru; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkru = eax; + return pkru; +} + +static inline void write_pkru(u32 pkru) +{ + unsigned int eax = pkru; + unsigned int ecx = 0; + unsigned int edx = 0; + + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); +} + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/setjmp32.S b/tests/kvm-unit-tests/lib/x86/setjmp32.S new file mode 100644 index 00000000..b0be7c21 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/setjmp32.S @@ -0,0 +1,25 @@ +.globl setjmp +setjmp: + mov (%esp), %ecx // get return EIP + mov 4(%esp), %eax // get jmp_buf + mov %ecx, (%eax) + mov %esp, 4(%eax) + mov %ebp, 8(%eax) + mov %ebx, 12(%eax) + mov %esi, 16(%eax) + mov %edi, 20(%eax) + xor %eax, %eax + ret + +.globl longjmp +longjmp: + mov 8(%esp), %eax // get return value + mov 4(%esp), %ecx // get jmp_buf + mov 20(%ecx), %edi + mov 16(%ecx), %esi + mov 12(%ecx), %ebx + mov 8(%ecx), %ebp + mov 4(%ecx), %esp + mov (%ecx), %ecx // get saved EIP + mov %ecx, (%esp) // and store it on the stack + ret diff --git a/tests/kvm-unit-tests/lib/x86/setjmp64.S b/tests/kvm-unit-tests/lib/x86/setjmp64.S new file mode 100644 index 00000000..c8ae7900 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/setjmp64.S @@ -0,0 +1,27 @@ +.globl setjmp +setjmp: + mov (%rsp), %rsi + mov %rsi, (%rdi) + mov %rsp, 0x8(%rdi) + mov %rbp, 0x10(%rdi) + mov %rbx, 0x18(%rdi) + mov %r12, 0x20(%rdi) + mov %r13, 0x28(%rdi) + mov %r14, 0x30(%rdi) + mov %r15, 0x38(%rdi) + xor %eax, %eax + ret + +.globl longjmp +longjmp: + mov %esi, %eax + mov 0x38(%rdi), %r15 + mov 0x30(%rdi), %r14 + mov 0x28(%rdi), %r13 + mov 0x20(%rdi), %r12 + mov 0x18(%rdi), %rbx + mov 0x10(%rdi), %rbp + mov 0x8(%rdi), %rsp + mov (%rdi), %rsi + mov %rsi, (%rsp) + ret diff --git a/tests/kvm-unit-tests/lib/x86/setup.c b/tests/kvm-unit-tests/lib/x86/setup.c new file mode 100644 index 00000000..804ba3a1 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/setup.c @@ -0,0 +1,47 @@ +/* + * Initialize machine setup information + * + * Copyright (C) 2017, Red Hat Inc, Andrew Jones + * + * This work is licensed under the terms of the GNU LGPL, version 2. + */ +#include "libcflat.h" + +#define MBI_MODS_COUNT 20 +#define MBI_MODS_ADDR 24 +#define MB_MOD_START 0 +#define MB_MOD_END 4 + +#define ENV_SIZE 16384 + +extern void setup_env(char *env, int size); + +char *initrd; +u32 initrd_size; + +static char env[ENV_SIZE]; + +void setup_get_initrd(u8 *bootinfo) +{ + u32 *mods_addr, *mod_start, *mod_end; + + if (*((u32 *)&bootinfo[MBI_MODS_COUNT]) != 1) + return; + + mods_addr = (u32 *)&bootinfo[MBI_MODS_ADDR]; + mod_start = (u32 *)(ulong)(*mods_addr + MB_MOD_START); + mod_end = (u32 *)(ulong)(*mods_addr + MB_MOD_END); + + initrd = (char *)(ulong)*mod_start; + initrd_size = *mod_end - *mod_start; +} + +void setup_environ(void) +{ + if (initrd) { + /* environ is currently the only file in the initrd */ + u32 size = MIN(initrd_size, ENV_SIZE); + memcpy(env, initrd, size); + setup_env(env, size); + } +} diff --git a/tests/kvm-unit-tests/lib/x86/smp.c b/tests/kvm-unit-tests/lib/x86/smp.c new file mode 100644 index 00000000..1eb49f24 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/smp.c @@ -0,0 +1,125 @@ + +#include +#include "smp.h" +#include "apic.h" +#include "fwcfg.h" +#include "desc.h" + +#define IPI_VECTOR 0x20 + +typedef void (*ipi_function_type)(void *data); + +static struct spinlock ipi_lock; +static volatile ipi_function_type ipi_function; +static void *volatile ipi_data; +static volatile int ipi_done; +static volatile bool ipi_wait; +static int _cpu_count; + +static __attribute__((used)) void ipi() +{ + void (*function)(void *data) = ipi_function; + void *data = ipi_data; + bool wait = ipi_wait; + + if (!wait) { + ipi_done = 1; + apic_write(APIC_EOI, 0); + } + function(data); + if (wait) { + ipi_done = 1; + apic_write(APIC_EOI, 0); + } +} + +asm ( + "ipi_entry: \n" + " call ipi \n" +#ifndef __x86_64__ + " iret" +#else + " iretq" +#endif + ); + +void spin_lock(struct spinlock *lock) +{ + int v = 1; + + do { + asm volatile ("xchg %1, %0" : "+m"(lock->v), "+r"(v)); + } while (v); + asm volatile ("" : : : "memory"); +} + +void spin_unlock(struct spinlock *lock) +{ + asm volatile ("" : : : "memory"); + lock->v = 0; +} + +int cpu_count(void) +{ + return _cpu_count; +} + +int smp_id(void) +{ + unsigned id; + + asm ("mov %%gs:0, %0" : "=r"(id)); + return id; +} + +static void setup_smp_id(void *data) +{ + asm ("mov %0, %%gs:0" : : "r"(apic_id()) : "memory"); +} + +static void __on_cpu(int cpu, void (*function)(void *data), void *data, + int wait) +{ + spin_lock(&ipi_lock); + if (cpu == smp_id()) + function(data); + else { + ipi_done = 0; + ipi_function = function; + ipi_data = data; + ipi_wait = wait; + apic_icr_write(APIC_INT_ASSERT | APIC_DEST_PHYSICAL | APIC_DM_FIXED + | IPI_VECTOR, + cpu); + while (!ipi_done) + ; + } + spin_unlock(&ipi_lock); +} + +void on_cpu(int cpu, void (*function)(void *data), void *data) +{ + __on_cpu(cpu, function, data, 1); +} + +void on_cpu_async(int cpu, void (*function)(void *data), void *data) +{ + __on_cpu(cpu, function, data, 0); +} + + +void smp_init(void) +{ + int i; + void ipi_entry(void); + + _cpu_count = fwcfg_get_nb_cpus(); + + setup_idt(); + set_idt_entry(IPI_VECTOR, ipi_entry, 0); + + setup_smp_id(0); + for (i = 1; i < cpu_count(); ++i) + on_cpu(i, setup_smp_id, 0); + +} diff --git a/tests/kvm-unit-tests/lib/x86/smp.h b/tests/kvm-unit-tests/lib/x86/smp.h new file mode 100644 index 00000000..afabac84 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/smp.h @@ -0,0 +1,12 @@ +#ifndef __SMP_H +#define __SMP_H +#include + +void smp_init(void); + +int cpu_count(void); +int smp_id(void); +void on_cpu(int cpu, void (*function)(void *data), void *data); +void on_cpu_async(int cpu, void (*function)(void *data), void *data); + +#endif diff --git a/tests/kvm-unit-tests/lib/x86/stack.c b/tests/kvm-unit-tests/lib/x86/stack.c new file mode 100644 index 00000000..5ecd97ce --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/stack.c @@ -0,0 +1,31 @@ +#include +#include + +int backtrace_frame(const void *frame, const void **return_addrs, int max_depth) +{ + static int walking; + int depth = 0; + const unsigned long *bp = (unsigned long *) frame; + + if (walking) { + printf("RECURSIVE STACK WALK!!!\n"); + return 0; + } + walking = 1; + + for (depth = 0; bp && depth < max_depth; depth++) { + return_addrs[depth] = (void *) bp[1]; + if (return_addrs[depth] == 0) + break; + bp = (unsigned long *) bp[0]; + } + + walking = 0; + return depth; +} + +int backtrace(const void **return_addrs, int max_depth) +{ + return backtrace_frame(__builtin_frame_address(0), return_addrs, + max_depth); +} diff --git a/tests/kvm-unit-tests/lib/x86/vm.c b/tests/kvm-unit-tests/lib/x86/vm.c new file mode 100644 index 00000000..cda4c5f4 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/vm.c @@ -0,0 +1,224 @@ +#include "fwcfg.h" +#include "vm.h" +#include "libcflat.h" + +static void *free = 0; +static void *vfree_top = 0; + +static void free_memory(void *mem, unsigned long size) +{ + while (size >= PAGE_SIZE) { + *(void **)mem = free; + free = mem; + mem += PAGE_SIZE; + size -= PAGE_SIZE; + } +} + +void *alloc_page() +{ + void *p; + + if (!free) + return 0; + + p = free; + free = *(void **)free; + + return p; +} + +void free_page(void *page) +{ + *(void **)page = free; + free = page; +} + +extern char edata; +static unsigned long end_of_memory; + +unsigned long *install_pte(unsigned long *cr3, + int pte_level, + void *virt, + unsigned long pte, + unsigned long *pt_page) +{ + int level; + unsigned long *pt = cr3; + unsigned offset; + + for (level = PAGE_LEVEL; level > pte_level; --level) { + offset = PGDIR_OFFSET((unsigned long)virt, level); + if (!(pt[offset] & PT_PRESENT_MASK)) { + unsigned long *new_pt = pt_page; + if (!new_pt) + new_pt = alloc_page(); + else + pt_page = 0; + memset(new_pt, 0, PAGE_SIZE); + pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; + } + pt = phys_to_virt(pt[offset] & PT_ADDR_MASK); + } + offset = PGDIR_OFFSET((unsigned long)virt, level); + pt[offset] = pte; + return &pt[offset]; +} + +unsigned long *get_pte(unsigned long *cr3, void *virt) +{ + int level; + unsigned long *pt = cr3, pte; + unsigned offset; + + for (level = PAGE_LEVEL; level > 1; --level) { + offset = ((unsigned long)virt >> (((level-1) * PGDIR_WIDTH) + 12)) & PGDIR_MASK; + pte = pt[offset]; + if (!(pte & PT_PRESENT_MASK)) + return NULL; + if (level == 2 && (pte & PT_PAGE_SIZE_MASK)) + return &pt[offset]; + pt = phys_to_virt(pte & PT_ADDR_MASK); + } + offset = ((unsigned long)virt >> (((level-1) * PGDIR_WIDTH) + 12)) & PGDIR_MASK; + return &pt[offset]; +} + +unsigned long *install_large_page(unsigned long *cr3, + unsigned long phys, + void *virt) +{ + return install_pte(cr3, 2, virt, + phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK, 0); +} + +unsigned long *install_page(unsigned long *cr3, + unsigned long phys, + void *virt) +{ + return install_pte(cr3, 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK, 0); +} + + +static void setup_mmu_range(unsigned long *cr3, unsigned long start, + unsigned long len) +{ + u64 max = (u64)len + (u64)start; + u64 phys = start; + + while (phys + LARGE_PAGE_SIZE <= max) { + install_large_page(cr3, phys, (void *)(ulong)phys); + phys += LARGE_PAGE_SIZE; + } + while (phys + PAGE_SIZE <= max) { + install_page(cr3, phys, (void *)(ulong)phys); + phys += PAGE_SIZE; + } +} + +static void setup_mmu(unsigned long len) +{ + unsigned long *cr3 = alloc_page(); + + memset(cr3, 0, PAGE_SIZE); + +#ifdef __x86_64__ + if (len < (1ul << 32)) + len = (1ul << 32); /* map mmio 1:1 */ + + setup_mmu_range(cr3, 0, len); +#else + if (len > (1ul << 31)) + len = (1ul << 31); + + /* 0 - 2G memory, 2G-3G valloc area, 3G-4G mmio */ + setup_mmu_range(cr3, 0, len); + setup_mmu_range(cr3, 3ul << 30, (1ul << 30)); + vfree_top = (void*)(3ul << 30); +#endif + + write_cr3(virt_to_phys(cr3)); +#ifndef __x86_64__ + write_cr4(X86_CR4_PSE); +#endif + write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP); + + printf("paging enabled\n"); + printf("cr0 = %lx\n", read_cr0()); + printf("cr3 = %lx\n", read_cr3()); + printf("cr4 = %lx\n", read_cr4()); +} + +void setup_vm() +{ + assert(!end_of_memory); + end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE); + free_memory(&edata, end_of_memory - (unsigned long)&edata); + setup_mmu(end_of_memory); +} + +void *vmalloc(unsigned long size) +{ + void *mem, *p; + unsigned pages; + + size += sizeof(unsigned long); + + size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + vfree_top -= size; + mem = p = vfree_top; + pages = size / PAGE_SIZE; + while (pages--) { + install_page(phys_to_virt(read_cr3()), virt_to_phys(alloc_page()), p); + p += PAGE_SIZE; + } + *(unsigned long *)mem = size; + mem += sizeof(unsigned long); + return mem; +} + +uint64_t virt_to_phys_cr3(void *mem) +{ + return (*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK) + ((ulong)mem & (PAGE_SIZE - 1)); +} + +void vfree(void *mem) +{ + unsigned long size = ((unsigned long *)mem)[-1]; + + while (size) { + free_page(phys_to_virt(*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK)); + mem += PAGE_SIZE; + size -= PAGE_SIZE; + } +} + +void *vmap(unsigned long long phys, unsigned long size) +{ + void *mem, *p; + unsigned pages; + + size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + vfree_top -= size; + phys &= ~(unsigned long long)(PAGE_SIZE - 1); + + mem = p = vfree_top; + pages = size / PAGE_SIZE; + while (pages--) { + install_page(phys_to_virt(read_cr3()), phys, p); + phys += PAGE_SIZE; + p += PAGE_SIZE; + } + return mem; +} + +void *alloc_vpages(ulong nr) +{ + vfree_top -= PAGE_SIZE * nr; + return vfree_top; +} + +void *alloc_vpage(void) +{ + return alloc_vpages(1); +} diff --git a/tests/kvm-unit-tests/lib/x86/vm.h b/tests/kvm-unit-tests/lib/x86/vm.h new file mode 100644 index 00000000..6a4384f5 --- /dev/null +++ b/tests/kvm-unit-tests/lib/x86/vm.h @@ -0,0 +1,31 @@ +#ifndef VM_H +#define VM_H + +#include "processor.h" +#include "asm/page.h" +#include "asm/io.h" + +void setup_vm(); + +void *vmalloc(unsigned long size); +void vfree(void *mem); +void *vmap(unsigned long long phys, unsigned long size); +void *alloc_vpage(void); +void *alloc_vpages(ulong nr); +uint64_t virt_to_phys_cr3(void *mem); + +unsigned long *get_pte(unsigned long *cr3, void *virt); +unsigned long *install_pte(unsigned long *cr3, + int pte_level, + void *virt, + unsigned long pte, + unsigned long *pt_page); + +void *alloc_page(); +void free_page(void *page); + +unsigned long *install_large_page(unsigned long *cr3,unsigned long phys, + void *virt); +unsigned long *install_page(unsigned long *cr3, unsigned long phys, void *virt); + +#endif diff --git a/tests/kvm-unit-tests/run.js b/tests/kvm-unit-tests/run.js new file mode 100755 index 00000000..926fbb5b --- /dev/null +++ b/tests/kvm-unit-tests/run.js @@ -0,0 +1,38 @@ +#!/usr/bin/env node +"use strict"; + +var V86 = require("../../build/libv86.js").V86; +var fs = require("fs"); + +function readfile(path) +{ + return new Uint8Array(fs.readFileSync(path)).buffer; +} + +function Loader(path) +{ + this.buffer = readfile(path); + this.byteLength = this.buffer.byteLength; +} + +Loader.prototype.load = function() +{ + this.onload && this.onload({}); +}; + +var bios = readfile(__dirname + "/../../bios/seabios.bin"); +var vga_bios = readfile(__dirname + "/../../bios/vgabios.bin"); + +var emulator = new V86({ + bios: { buffer: bios }, + vga_bios: { buffer: vga_bios }, + multiboot: new Loader(process.argv[2]), + autostart: true, + memory_size: 256 * 1024 * 1024, +}); + +emulator.add_listener("serial0-output-char", function(chr) +{ + process.stdout.write(chr); +}); + diff --git a/tests/kvm-unit-tests/x86/Makefile b/tests/kvm-unit-tests/x86/Makefile new file mode 100644 index 00000000..369a38b2 --- /dev/null +++ b/tests/kvm-unit-tests/x86/Makefile @@ -0,0 +1 @@ +include $(TEST_DIR)/Makefile.$(ARCH) diff --git a/tests/kvm-unit-tests/x86/Makefile.common b/tests/kvm-unit-tests/x86/Makefile.common new file mode 100644 index 00000000..fbab82c8 --- /dev/null +++ b/tests/kvm-unit-tests/x86/Makefile.common @@ -0,0 +1,88 @@ +#This is a make file with common rules for both x86 & x86-64 + +all: test_cases + +cflatobjs += lib/pci.o +cflatobjs += lib/pci-edu.o +cflatobjs += lib/x86/setup.o +cflatobjs += lib/x86/io.o +cflatobjs += lib/x86/smp.o +cflatobjs += lib/x86/vm.o +cflatobjs += lib/x86/fwcfg.o +cflatobjs += lib/x86/apic.o +cflatobjs += lib/x86/atomic.o +cflatobjs += lib/x86/desc.o +cflatobjs += lib/x86/isr.o +cflatobjs += lib/x86/acpi.o +cflatobjs += lib/x86/stack.o + +$(libcflat): LDFLAGS += -nostdlib +$(libcflat): CFLAGS += -ffreestanding -I lib + +CFLAGS += -m$(bits) +CFLAGS += -O1 + +# stack.o relies on frame pointers. +KEEP_FRAME_POINTER := y + +libgcc := $(shell $(CC) -m$(bits) --print-libgcc-file-name) + +# We want to keep intermediate file: %.elf and %.o +.PRECIOUS: %.elf %.o + +FLATLIBS = lib/libcflat.a $(libgcc) +%.elf: %.o $(FLATLIBS) x86/flat.lds $(cstart.o) + $(CC) $(CFLAGS) -nostdlib -o $@ -Wl,-T,x86/flat.lds \ + $(filter %.o, $^) $(FLATLIBS) + +%.flat: %.elf + $(OBJCOPY) -O elf32-i386 $^ $@ + +tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ + $(TEST_DIR)/smptest.flat $(TEST_DIR)/port80.flat \ + $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \ + $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \ + $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ + $(TEST_DIR)/s3.flat $(TEST_DIR)/pmu.flat $(TEST_DIR)/setjmp.flat \ + $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ + $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \ + $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \ + +ifdef API +tests-common += api/api-sample +tests-common += api/dirty-log +tests-common += api/dirty-log-perf +endif + +test_cases: $(tests-common) $(tests) + +$(TEST_DIR)/%.o: CFLAGS += -std=gnu99 -ffreestanding -I lib -I lib/x86 + +$(TEST_DIR)/realmode.elf: $(TEST_DIR)/realmode.o + $(CC) -m32 -nostdlib -o $@ -Wl,-T,$(TEST_DIR)/realmode.lds $^ + +$(TEST_DIR)/realmode.o: bits = 32 + +$(TEST_DIR)/kvmclock_test.elf: $(TEST_DIR)/kvmclock.o + +$(TEST_DIR)/hyperv_synic.elf: $(TEST_DIR)/hyperv.o + +$(TEST_DIR)/hyperv_stimer.elf: $(TEST_DIR)/hyperv.o + +arch_clean: + $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ + $(TEST_DIR)/.*.d lib/x86/.*.d + +api/%.o: CFLAGS += -m32 + +api/%: LDLIBS += -lstdc++ -lboost_thread -lpthread -lrt +api/%: LDFLAGS += -m32 + +api/libapi.a: api/kvmxx.o api/identity.o api/exception.o api/memmap.o + $(AR) rcs $@ $^ + +api/api-sample: api/api-sample.o api/libapi.a + +api/dirty-log: api/dirty-log.o api/libapi.a + +api/dirty-log-perf: api/dirty-log-perf.o api/libapi.a diff --git a/tests/kvm-unit-tests/x86/Makefile.i386 b/tests/kvm-unit-tests/x86/Makefile.i386 new file mode 100644 index 00000000..5f89e3e5 --- /dev/null +++ b/tests/kvm-unit-tests/x86/Makefile.i386 @@ -0,0 +1,33 @@ +cstart.o = $(TEST_DIR)/cstart.o +bits = 32 +ldarch = elf32-i386 + +cflatobjs += lib/x86/setjmp32.o + +tests = $(TEST_DIR)/taskswitch.flat $(TEST_DIR)/taskswitch2.flat \ + $(TEST_DIR)/cmpxchg8b.flat + + +# added some tests from Makefile.x86_64 +tests += $(TEST_DIR)/apic.flat \ + $(TEST_DIR)/rmap_chain.flat \ + $(TEST_DIR)/ioapic.flat $(TEST_DIR)/memory.flat + + +# These tests from Makefile.x86_64 don't compile. emulator.c would be nice to have +# $(TEST_DIR)/emulator.flat +# $(TEST_DIR)/access.flat +# $(TEST_DIR)/idt_test.flat +# $(TEST_DIR)/xsave.flat +# $(TEST_DIR)/pcid.flat +# $(TEST_DIR)/debug.flat +# $(TEST_DIR)/pku.flat +# $(TEST_DIR)/hyperv_clock.flat + +#tests += $(TEST_DIR)/svm.flat +#tests += $(TEST_DIR)/vmx.flat +tests += $(TEST_DIR)/tscdeadline_latency.flat +#tests += $(TEST_DIR)/intel-iommu.flat + + +include $(TEST_DIR)/Makefile.common diff --git a/tests/kvm-unit-tests/x86/Makefile.x86_64 b/tests/kvm-unit-tests/x86/Makefile.x86_64 new file mode 100644 index 00000000..3e2821ea --- /dev/null +++ b/tests/kvm-unit-tests/x86/Makefile.x86_64 @@ -0,0 +1,24 @@ +cstart.o = $(TEST_DIR)/cstart64.o +bits = 64 +ldarch = elf64-x86-64 +CFLAGS += -mno-red-zone + +cflatobjs += lib/x86/setjmp64.o +cflatobjs += lib/x86/intel-iommu.o + +tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ + $(TEST_DIR)/emulator.flat $(TEST_DIR)/idt_test.flat \ + $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ + $(TEST_DIR)/pcid.flat $(TEST_DIR)/debug.flat \ + $(TEST_DIR)/ioapic.flat $(TEST_DIR)/memory.flat \ + $(TEST_DIR)/pku.flat $(TEST_DIR)/hyperv_clock.flat +tests += $(TEST_DIR)/svm.flat +tests += $(TEST_DIR)/vmx.flat +tests += $(TEST_DIR)/tscdeadline_latency.flat +tests += $(TEST_DIR)/intel-iommu.flat + +include $(TEST_DIR)/Makefile.common + +$(TEST_DIR)/hyperv_clock.elf: $(TEST_DIR)/hyperv_clock.o + +$(TEST_DIR)/vmx.elf: $(TEST_DIR)/vmx_tests.o diff --git a/tests/kvm-unit-tests/x86/README b/tests/kvm-unit-tests/x86/README new file mode 100644 index 00000000..218fe1a1 --- /dev/null +++ b/tests/kvm-unit-tests/x86/README @@ -0,0 +1,49 @@ + +Tests for the x86 architecture are run as kernel images for qemu that support +multiboot format. The tests use an infrastructure called from the bios code. +The infrastructure initialize the system/cpus, switches to long-mode, and +calls the 'main' function of the individual test. Tests use a qemu virtual +test device, named testdev, for services like printing, exiting, querying +memory size, etc. See file docs/testdev.txt for more details. + +Examples of a test invocation: + These invocations run the msr test case and outputs to stdio. + + Using qemu-kvm: + + qemu-kvm -device testdev,chardev=testlog \ + -chardev file,id=testlog,path=msr.out \ + -serial stdio -kernel ./x86/msr.flat + + Using qemu (supported since qemu 1.3): + + qemu-system-x86_64 -enable-kvm -device pc-testdev -serial stdio \ + -device isa-debug-exit,iobase=0xf4,iosize=0x4 \ + -kernel ./x86/msr.flat + +Tests in this directory and what they do: + access: lots of page table related access (pte/pde) (read/write) + apic: enable x2apic, self ipi, ioapic intr, ioapic simultaneous + emulator: move to/from regs, cmps, push, pop, to/from cr8, smsw and lmsw + hypercall: intel and amd hypercall insn + msr: write to msr (only KERNEL_GS_BASE for now) + port80: lots of out to port 80 + realmode: goes back to realmode, shld, push/pop, mov immediate, cmp + immediate, add immediate, io, eflags instructions + (clc, cli, etc.), jcc short, jcc near, call, long jmp, xchg + sieve: heavy memory access with no paging and with paging static and + with paging vmalloc'ed + smptest: run smp_id() on every cpu and compares return value to number + tsc: write to tsc(0) and write to tsc(100000000000) and read it back + vmexit: long loops for each: cpuid, vmcall, mov_from_cr8, mov_to_cr8, + inl_pmtimer, ipi, ipi+halt + kvmclock_test: test of wallclock, monotonic cycle and performance of kvmclock + pcid: basic functionality test of PCID/INVPCID feature + +Legacy notes: + The exit status of the binary is inconsistent; with qemu-system, after + the unit-test is done, the exit status of qemu is 1, different from the + 'old style' qemu-kvm, whose exit status in successful completion is 0. + The run script converts the qemu-system exit status to 0 (SUCCESS), and + treats the legacy exit status of 0 as an error, converting it to an exit + status of 1. diff --git a/tests/kvm-unit-tests/x86/access.c b/tests/kvm-unit-tests/x86/access.c new file mode 100644 index 00000000..0546dbb9 --- /dev/null +++ b/tests/kvm-unit-tests/x86/access.c @@ -0,0 +1,991 @@ + +#include "libcflat.h" +#include "desc.h" +#include "processor.h" +#include "asm/page.h" + +#define smp_id() 0 + +#define true 1 +#define false 0 + +static _Bool verbose = false; + +typedef unsigned long pt_element_t; +static int cpuid_7_ebx; +static int cpuid_7_ecx; +static int invalid_mask; + +#define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) +#define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) + +#define CR0_WP_MASK (1UL << 16) +#define CR4_SMEP_MASK (1UL << 20) + +#define PFERR_PRESENT_MASK (1U << 0) +#define PFERR_WRITE_MASK (1U << 1) +#define PFERR_USER_MASK (1U << 2) +#define PFERR_RESERVED_MASK (1U << 3) +#define PFERR_FETCH_MASK (1U << 4) +#define PFERR_PK_MASK (1U << 5) + +#define MSR_EFER 0xc0000080 +#define EFER_NX_MASK (1ull << 11) + +#define PT_INDEX(address, level) \ + ((address) >> (12 + ((level)-1) * 9)) & 511 + +/* + * page table access check tests + */ + +enum { + AC_PTE_PRESENT_BIT, + AC_PTE_WRITABLE_BIT, + AC_PTE_USER_BIT, + AC_PTE_ACCESSED_BIT, + AC_PTE_DIRTY_BIT, + AC_PTE_NX_BIT, + AC_PTE_BIT51_BIT, + + AC_PDE_PRESENT_BIT, + AC_PDE_WRITABLE_BIT, + AC_PDE_USER_BIT, + AC_PDE_ACCESSED_BIT, + AC_PDE_DIRTY_BIT, + AC_PDE_PSE_BIT, + AC_PDE_NX_BIT, + AC_PDE_BIT51_BIT, + AC_PDE_BIT13_BIT, + + AC_PKU_AD_BIT, + AC_PKU_WD_BIT, + AC_PKU_PKEY_BIT, + + AC_ACCESS_USER_BIT, + AC_ACCESS_WRITE_BIT, + AC_ACCESS_FETCH_BIT, + AC_ACCESS_TWICE_BIT, + + AC_CPU_EFER_NX_BIT, + AC_CPU_CR0_WP_BIT, + AC_CPU_CR4_SMEP_BIT, + AC_CPU_CR4_PKE_BIT, + + NR_AC_FLAGS +}; + +#define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) +#define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) +#define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) +#define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) +#define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) +#define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) +#define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) + +#define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) +#define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) +#define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) +#define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) +#define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) +#define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) +#define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) +#define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) +#define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) + +#define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) +#define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) +#define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) + +#define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) +#define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) +#define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) +#define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) + +#define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) +#define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) +#define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) +#define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) + +const char *ac_names[] = { + [AC_PTE_PRESENT_BIT] = "pte.p", + [AC_PTE_ACCESSED_BIT] = "pte.a", + [AC_PTE_WRITABLE_BIT] = "pte.rw", + [AC_PTE_USER_BIT] = "pte.user", + [AC_PTE_DIRTY_BIT] = "pte.d", + [AC_PTE_NX_BIT] = "pte.nx", + [AC_PTE_BIT51_BIT] = "pte.51", + [AC_PDE_PRESENT_BIT] = "pde.p", + [AC_PDE_ACCESSED_BIT] = "pde.a", + [AC_PDE_WRITABLE_BIT] = "pde.rw", + [AC_PDE_USER_BIT] = "pde.user", + [AC_PDE_DIRTY_BIT] = "pde.d", + [AC_PDE_PSE_BIT] = "pde.pse", + [AC_PDE_NX_BIT] = "pde.nx", + [AC_PDE_BIT51_BIT] = "pde.51", + [AC_PDE_BIT13_BIT] = "pde.13", + [AC_PKU_AD_BIT] = "pkru.ad", + [AC_PKU_WD_BIT] = "pkru.wd", + [AC_PKU_PKEY_BIT] = "pkey=1", + [AC_ACCESS_WRITE_BIT] = "write", + [AC_ACCESS_USER_BIT] = "user", + [AC_ACCESS_FETCH_BIT] = "fetch", + [AC_ACCESS_TWICE_BIT] = "twice", + [AC_CPU_EFER_NX_BIT] = "efer.nx", + [AC_CPU_CR0_WP_BIT] = "cr0.wp", + [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", + [AC_CPU_CR4_PKE_BIT] = "cr4.pke", +}; + +static inline void *va(pt_element_t phys) +{ + return (void *)phys; +} + +typedef struct { + pt_element_t pt_pool; + unsigned pt_pool_size; + unsigned pt_pool_current; +} ac_pool_t; + +typedef struct { + unsigned flags; + void *virt; + pt_element_t phys; + pt_element_t *ptep; + pt_element_t expected_pte; + pt_element_t *pdep; + pt_element_t expected_pde; + pt_element_t ignore_pde; + int expected_fault; + unsigned expected_error; +} ac_test_t; + +typedef struct { + unsigned short limit; + unsigned long linear_addr; +} __attribute__((packed)) descriptor_table_t; + + +static void ac_test_show(ac_test_t *at); + +int write_cr4_checking(unsigned long val) +{ + asm volatile(ASM_TRY("1f") + "mov %0,%%cr4\n\t" + "1:": : "r" (val)); + return exception_vector(); +} + +void set_cr0_wp(int wp) +{ + unsigned long cr0 = read_cr0(); + unsigned long old_cr0 = cr0; + + cr0 &= ~CR0_WP_MASK; + if (wp) + cr0 |= CR0_WP_MASK; + if (old_cr0 != cr0) + write_cr0(cr0); +} + +void set_cr4_smep(int smep) +{ + unsigned long cr4 = read_cr4(); + unsigned long old_cr4 = cr4; + extern u64 ptl2[]; + + cr4 &= ~CR4_SMEP_MASK; + if (smep) + cr4 |= CR4_SMEP_MASK; + if (old_cr4 == cr4) + return; + + if (smep) + ptl2[2] &= ~PT_USER_MASK; + write_cr4(cr4); + if (!smep) + ptl2[2] |= PT_USER_MASK; +} + +void set_cr4_pke(int pke) +{ + unsigned long cr4 = read_cr4(); + unsigned long old_cr4 = cr4; + + cr4 &= ~X86_CR4_PKE; + if (pke) + cr4 |= X86_CR4_PKE; + if (old_cr4 == cr4) + return; + + /* Check that protection keys do not affect accesses when CR4.PKE=0. */ + if ((read_cr4() & X86_CR4_PKE) && !pke) { + write_pkru(0xfffffffc); + } + write_cr4(cr4); +} + +void set_efer_nx(int nx) +{ + unsigned long long efer = rdmsr(MSR_EFER); + unsigned long long old_efer = efer; + + efer &= ~EFER_NX_MASK; + if (nx) + efer |= EFER_NX_MASK; + if (old_efer != efer) + wrmsr(MSR_EFER, efer); +} + +static void ac_env_int(ac_pool_t *pool) +{ + extern char page_fault, kernel_entry; + set_idt_entry(14, &page_fault, 0); + set_idt_entry(0x20, &kernel_entry, 3); + + pool->pt_pool = 33 * 1024 * 1024; + pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; + pool->pt_pool_current = 0; +} + +void ac_test_init(ac_test_t *at, void *virt) +{ + wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); + set_cr0_wp(1); + at->flags = 0; + at->virt = virt; + at->phys = 32 * 1024 * 1024; +} + +int ac_test_bump_one(ac_test_t *at) +{ + at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; + return at->flags < (1 << NR_AC_FLAGS); +} + +#define F(x) ((flags & x##_MASK) != 0) + +_Bool ac_test_legal(ac_test_t *at) +{ + int flags = at->flags; + + if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) + return false; + + /* + * Since we convert current page to kernel page when cr4.smep=1, + * we can't switch to user mode. + */ + if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) + return false; + + /* + * Only test protection key faults if CR4.PKE=1. + */ + if (!F(AC_CPU_CR4_PKE) && + (F(AC_PKU_AD) || F(AC_PKU_WD))) { + return false; + } + + /* + * pde.bit13 checks handling of reserved bits in largepage PDEs. It is + * meaningless if there is a PTE. + */ + if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) + return false; + + return true; +} + +int ac_test_bump(ac_test_t *at) +{ + int ret; + + ret = ac_test_bump_one(at); + while (ret && !ac_test_legal(at)) + ret = ac_test_bump_one(at); + return ret; +} + +pt_element_t ac_test_alloc_pt(ac_pool_t *pool) +{ + pt_element_t ret = pool->pt_pool + pool->pt_pool_current; + pool->pt_pool_current += PAGE_SIZE; + return ret; +} + +_Bool ac_test_enough_room(ac_pool_t *pool) +{ + return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size; +} + +void ac_test_reset_pt_pool(ac_pool_t *pool) +{ + pool->pt_pool_current = 0; +} + +pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable, + bool user, bool executable) +{ + bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); + pt_element_t expected = 0; + + if (F(AC_ACCESS_USER) && !user) + at->expected_fault = 1; + + if (F(AC_ACCESS_WRITE) && !writable && !kwritable) + at->expected_fault = 1; + + if (F(AC_ACCESS_FETCH) && !executable) + at->expected_fault = 1; + + if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) + at->expected_fault = 1; + + if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { + if (F(AC_PKU_AD)) { + at->expected_fault = 1; + at->expected_error |= PFERR_PK_MASK; + } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { + at->expected_fault = 1; + at->expected_error |= PFERR_PK_MASK; + } + } + + if (!at->expected_fault) { + expected |= PT_ACCESSED_MASK; + if (F(AC_ACCESS_WRITE)) + expected |= PT_DIRTY_MASK; + } + + return expected; +} + +void ac_emulate_access(ac_test_t *at, unsigned flags) +{ + bool pde_valid, pte_valid; + bool user, writable, executable; + + if (F(AC_ACCESS_USER)) + at->expected_error |= PFERR_USER_MASK; + + if (F(AC_ACCESS_WRITE)) + at->expected_error |= PFERR_WRITE_MASK; + + if (F(AC_ACCESS_FETCH)) + at->expected_error |= PFERR_FETCH_MASK; + + if (!F(AC_PDE_ACCESSED)) + at->ignore_pde = PT_ACCESSED_MASK; + + pde_valid = F(AC_PDE_PRESENT) + && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13) + && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); + + if (!pde_valid) { + at->expected_fault = 1; + if (F(AC_PDE_PRESENT)) { + at->expected_error |= PFERR_RESERVED_MASK; + } else { + at->expected_error &= ~PFERR_PRESENT_MASK; + } + goto fault; + } + + writable = F(AC_PDE_WRITABLE); + user = F(AC_PDE_USER); + executable = !F(AC_PDE_NX); + + if (F(AC_PDE_PSE)) { + at->expected_pde |= ac_test_permissions(at, flags, writable, user, + executable); + goto no_pte; + } + + at->expected_pde |= PT_ACCESSED_MASK; + + pte_valid = F(AC_PTE_PRESENT) + && !F(AC_PTE_BIT51) + && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); + + if (!pte_valid) { + at->expected_fault = 1; + if (F(AC_PTE_PRESENT)) { + at->expected_error |= PFERR_RESERVED_MASK; + } else { + at->expected_error &= ~PFERR_PRESENT_MASK; + } + goto fault; + } + + writable &= F(AC_PTE_WRITABLE); + user &= F(AC_PTE_USER); + executable &= !F(AC_PTE_NX); + + at->expected_pte |= ac_test_permissions(at, flags, writable, user, + executable); + +no_pte: +fault: + if (!at->expected_fault) + at->ignore_pde = 0; + if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) + at->expected_error &= ~PFERR_FETCH_MASK; +} + +void ac_set_expected_status(ac_test_t *at) +{ + invlpg(at->virt); + + if (at->ptep) + at->expected_pte = *at->ptep; + at->expected_pde = *at->pdep; + at->ignore_pde = 0; + at->expected_fault = 0; + at->expected_error = PFERR_PRESENT_MASK; + + if (at->flags & AC_ACCESS_TWICE_MASK) { + ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK + & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK); + at->expected_fault = 0; + at->expected_error = PFERR_PRESENT_MASK; + at->ignore_pde = 0; + } + + ac_emulate_access(at, at->flags); +} + +void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, + u64 pt_page) + +{ + unsigned long root = read_cr3(); + int flags = at->flags; + + if (!ac_test_enough_room(pool)) + ac_test_reset_pt_pool(pool); + + at->ptep = 0; + for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { + pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); + unsigned index = PT_INDEX((unsigned long)at->virt, i); + pt_element_t pte = 0; + switch (i) { + case 4: + case 3: + pte = pd_page ? pd_page : ac_test_alloc_pt(pool); + pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; + break; + case 2: + if (!F(AC_PDE_PSE)) { + pte = pt_page ? pt_page : ac_test_alloc_pt(pool); + /* The protection key is ignored on non-leaf entries. */ + if (F(AC_PKU_PKEY)) + pte |= 2ull << 59; + } else { + pte = at->phys & PT_PSE_BASE_ADDR_MASK; + pte |= PT_PAGE_SIZE_MASK; + if (F(AC_PKU_PKEY)) + pte |= 1ull << 59; + } + if (F(AC_PDE_PRESENT)) + pte |= PT_PRESENT_MASK; + if (F(AC_PDE_WRITABLE)) + pte |= PT_WRITABLE_MASK; + if (F(AC_PDE_USER)) + pte |= PT_USER_MASK; + if (F(AC_PDE_ACCESSED)) + pte |= PT_ACCESSED_MASK; + if (F(AC_PDE_DIRTY)) + pte |= PT_DIRTY_MASK; + if (F(AC_PDE_NX)) + pte |= PT64_NX_MASK; + if (F(AC_PDE_BIT51)) + pte |= 1ull << 51; + if (F(AC_PDE_BIT13)) + pte |= 1ull << 13; + at->pdep = &vroot[index]; + break; + case 1: + pte = at->phys & PT_BASE_ADDR_MASK; + if (F(AC_PKU_PKEY)) + pte |= 1ull << 59; + if (F(AC_PTE_PRESENT)) + pte |= PT_PRESENT_MASK; + if (F(AC_PTE_WRITABLE)) + pte |= PT_WRITABLE_MASK; + if (F(AC_PTE_USER)) + pte |= PT_USER_MASK; + if (F(AC_PTE_ACCESSED)) + pte |= PT_ACCESSED_MASK; + if (F(AC_PTE_DIRTY)) + pte |= PT_DIRTY_MASK; + if (F(AC_PTE_NX)) + pte |= PT64_NX_MASK; + if (F(AC_PTE_BIT51)) + pte |= 1ull << 51; + at->ptep = &vroot[index]; + break; + } + vroot[index] = pte; + root = vroot[index]; + } + ac_set_expected_status(at); +} + +static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) +{ + __ac_setup_specific_pages(at, pool, 0, 0); +} + +static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, + u64 pd_page, u64 pt_page) +{ + return __ac_setup_specific_pages(at, pool, pd_page, pt_page); +} + +static void dump_mapping(ac_test_t *at) +{ + unsigned long root = read_cr3(); + int flags = at->flags; + int i; + + printf("Dump mapping: address: %p\n", at->virt); + for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { + pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); + unsigned index = PT_INDEX((unsigned long)at->virt, i); + pt_element_t pte = vroot[index]; + + printf("------L%d: %lx\n", i, pte); + root = vroot[index]; + } +} + +static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, + const char *fmt, ...) +{ + va_list ap; + char buf[500]; + + if (!*success_ret) { + return; + } + + if (!cond) { + return; + } + + *success_ret = false; + + if (!verbose) { + puts("\n"); + ac_test_show(at); + } + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + printf("FAIL: %s\n", buf); + dump_mapping(at); +} + +static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) +{ + pte1 &= ~ignore; + pte2 &= ~ignore; + return pte1 == pte2; +} + +int ac_test_do_access(ac_test_t *at) +{ + static unsigned unique = 42; + int fault = 0; + unsigned e; + static unsigned char user_stack[4096]; + unsigned long rsp; + _Bool success = true; + int flags = at->flags; + + ++unique; + if (!(unique & 65535)) { + puts("."); + } + + *((unsigned char *)at->phys) = 0xc3; /* ret */ + + unsigned r = unique; + set_cr0_wp(F(AC_CPU_CR0_WP)); + set_efer_nx(F(AC_CPU_EFER_NX)); + set_cr4_pke(F(AC_CPU_CR4_PKE)); + if (F(AC_CPU_CR4_PKE)) { + /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ + write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | + (F(AC_PKU_AD) ? 4 : 0)); + } + + set_cr4_smep(F(AC_CPU_CR4_SMEP)); + + if (F(AC_ACCESS_TWICE)) { + asm volatile ( + "mov $fixed2, %%rsi \n\t" + "mov (%[addr]), %[reg] \n\t" + "fixed2:" + : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) + : [addr]"r"(at->virt) + : "rsi" + ); + fault = 0; + } + + asm volatile ("mov $fixed1, %%rsi \n\t" + "mov %%rsp, %%rdx \n\t" + "cmp $0, %[user] \n\t" + "jz do_access \n\t" + "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" + "pushq %[user_ds] \n\t" + "pushq %[user_stack_top] \n\t" + "pushfq \n\t" + "pushq %[user_cs] \n\t" + "pushq $do_access \n\t" + "iretq \n" + "do_access: \n\t" + "cmp $0, %[fetch] \n\t" + "jnz 2f \n\t" + "cmp $0, %[write] \n\t" + "jnz 1f \n\t" + "mov (%[addr]), %[reg] \n\t" + "jmp done \n\t" + "1: mov %[reg], (%[addr]) \n\t" + "jmp done \n\t" + "2: call *%[addr] \n\t" + "done: \n" + "fixed1: \n" + "int %[kernel_entry_vector] \n\t" + "back_to_kernel:" + : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) + : [addr]"r"(at->virt), + [write]"r"(F(AC_ACCESS_WRITE)), + [user]"r"(F(AC_ACCESS_USER)), + [fetch]"r"(F(AC_ACCESS_FETCH)), + [user_ds]"i"(USER_DS), + [user_cs]"i"(USER_CS), + [user_stack_top]"r"(user_stack + sizeof user_stack), + [kernel_entry_vector]"i"(0x20) + : "rsi"); + + asm volatile (".section .text.pf \n\t" + "page_fault: \n\t" + "pop %rbx \n\t" + "mov %rsi, (%rsp) \n\t" + "movl $1, %eax \n\t" + "iretq \n\t" + ".section .text"); + + asm volatile (".section .text.entry \n\t" + "kernel_entry: \n\t" + "mov %rdx, %rsp \n\t" + "jmp back_to_kernel \n\t" + ".section .text"); + + ac_test_check(at, &success, fault && !at->expected_fault, + "unexpected fault"); + ac_test_check(at, &success, !fault && at->expected_fault, + "unexpected access"); + ac_test_check(at, &success, fault && e != at->expected_error, + "error code %x expected %x", e, at->expected_error); + ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, + "pte %x expected %x", *at->ptep, at->expected_pte); + ac_test_check(at, &success, + !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), + "pde %x expected %x", *at->pdep, at->expected_pde); + + if (success && verbose) { + if (at->expected_fault) { + printf("PASS (%x)\n", at->expected_error); + } else { + printf("PASS\n"); + } + } + return success; +} + +static void ac_test_show(ac_test_t *at) +{ + char line[5000]; + + *line = 0; + strcat(line, "test"); + for (int i = 0; i < NR_AC_FLAGS; ++i) + if (at->flags & (1 << i)) { + strcat(line, " "); + strcat(line, ac_names[i]); + } + strcat(line, ": "); + printf("%s", line); +} + +/* + * This test case is used to triger the bug which is fixed by + * commit e09e90a5 in the kvm tree + */ +static int corrupt_hugepage_triger(ac_pool_t *pool) +{ + ac_test_t at1, at2; + + ac_test_init(&at1, (void *)(0x123400000000)); + ac_test_init(&at2, (void *)(0x666600000000)); + + at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; + ac_test_setup_pte(&at2, pool); + if (!ac_test_do_access(&at2)) + goto err; + + at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; + ac_test_setup_pte(&at1, pool); + if (!ac_test_do_access(&at1)) + goto err; + + at1.flags |= AC_ACCESS_WRITE_MASK; + ac_set_expected_status(&at1); + if (!ac_test_do_access(&at1)) + goto err; + + at2.flags |= AC_ACCESS_WRITE_MASK; + ac_set_expected_status(&at2); + if (!ac_test_do_access(&at2)) + goto err; + + return 1; + +err: + printf("corrupt_hugepage_triger test fail\n"); + return 0; +} + +/* + * This test case is used to triger the bug which is fixed by + * commit 3ddf6c06e13e in the kvm tree + */ +static int check_pfec_on_prefetch_pte(ac_pool_t *pool) +{ + ac_test_t at1, at2; + + ac_test_init(&at1, (void *)(0x123406001000)); + ac_test_init(&at2, (void *)(0x123406003000)); + + at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; + ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); + + at2.flags = at1.flags | AC_PTE_NX_MASK; + ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); + + if (!ac_test_do_access(&at1)) { + printf("%s: prepare fail\n", __FUNCTION__); + goto err; + } + + if (!ac_test_do_access(&at2)) { + printf("%s: check PFEC on prefetch pte path fail\n", + __FUNCTION__); + goto err; + } + + return 1; + +err: + return 0; +} + +/* + * If the write-fault access is from supervisor and CR0.WP is not set on the + * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte + * and clears U bit. This is the chance that kvm can change pte access from + * readonly to writable. + * + * Unfortunately, the pte access is the access of 'direct' shadow page table, + * means direct sp.role.access = pte_access, then we will create a writable + * spte entry on the readonly shadow page table. It will cause Dirty bit is + * not tracked when two guest ptes point to the same large page. Note, it + * does not have other impact except Dirty bit since cr0.wp is encoded into + * sp.role. + * + * Note: to trigger this bug, hugepage should be disabled on host. + */ +static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) +{ + ac_test_t at1, at2; + + ac_test_init(&at1, (void *)(0x123403000000)); + ac_test_init(&at2, (void *)(0x666606000000)); + + at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; + ac_test_setup_pte(&at2, pool); + if (!ac_test_do_access(&at2)) { + printf("%s: read on the first mapping fail.\n", __FUNCTION__); + goto err; + } + + at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; + ac_test_setup_pte(&at1, pool); + if (!ac_test_do_access(&at1)) { + printf("%s: write on the second mapping fail.\n", __FUNCTION__); + goto err; + } + + at2.flags |= AC_ACCESS_WRITE_MASK; + ac_set_expected_status(&at2); + if (!ac_test_do_access(&at2)) { + printf("%s: write on the first mapping fail.\n", __FUNCTION__); + goto err; + } + + return 1; + +err: + return 0; +} + +static int check_smep_andnot_wp(ac_pool_t *pool) +{ + ac_test_t at1; + int err_prepare_andnot_wp, err_smep_andnot_wp; + + if (!(cpuid_7_ebx & (1 << 7))) { + return 1; + } + + ac_test_init(&at1, (void *)(0x123406001000)); + + at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | + AC_PDE_USER_MASK | AC_PTE_USER_MASK | + AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | + AC_CPU_CR4_SMEP_MASK | + AC_CPU_CR0_WP_MASK | + AC_ACCESS_WRITE_MASK; + ac_test_setup_pte(&at1, pool); + + /* + * Here we write the ro user page when + * cr0.wp=0, then we execute it and SMEP + * fault should happen. + */ + err_prepare_andnot_wp = ac_test_do_access(&at1); + if (!err_prepare_andnot_wp) { + printf("%s: SMEP prepare fail\n", __FUNCTION__); + goto clean_up; + } + + at1.flags &= ~AC_ACCESS_WRITE_MASK; + at1.flags |= AC_ACCESS_FETCH_MASK; + ac_set_expected_status(&at1); + err_smep_andnot_wp = ac_test_do_access(&at1); + +clean_up: + set_cr4_smep(0); + + if (!err_prepare_andnot_wp) + goto err; + if (!err_smep_andnot_wp) { + printf("%s: check SMEP without wp fail\n", __FUNCTION__); + goto err; + } + return 1; + +err: + return 0; +} + +int ac_test_exec(ac_test_t *at, ac_pool_t *pool) +{ + int r; + + if (verbose) { + ac_test_show(at); + } + ac_test_setup_pte(at, pool); + r = ac_test_do_access(at); + return r; +} + +typedef int (*ac_test_fn)(ac_pool_t *pool); +const ac_test_fn ac_test_cases[] = +{ + corrupt_hugepage_triger, + check_pfec_on_prefetch_pte, + check_large_pte_dirty_for_nowp, + check_smep_andnot_wp +}; + +int ac_test_run(void) +{ + ac_test_t at; + ac_pool_t pool; + int i, tests, successes; + + printf("run\n"); + tests = successes = 0; + + if (cpuid_7_ecx & (1 << 3)) { + set_cr4_pke(1); + set_cr4_pke(0); + /* Now PKRU = 0xFFFFFFFF. */ + } else { + unsigned long cr4 = read_cr4(); + tests++; + if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) { + successes++; + invalid_mask |= AC_PKU_AD_MASK; + invalid_mask |= AC_PKU_WD_MASK; + invalid_mask |= AC_PKU_PKEY_MASK; + invalid_mask |= AC_CPU_CR4_PKE_MASK; + printf("CR4.PKE not available, disabling PKE tests\n"); + } else { + printf("Set PKE in CR4 - expect #GP: FAIL!\n"); + set_cr4_pke(0); + } + } + + if (!(cpuid_7_ebx & (1 << 7))) { + unsigned long cr4 = read_cr4(); + tests++; + if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) { + successes++; + invalid_mask |= AC_CPU_CR4_SMEP_MASK; + printf("CR4.SMEP not available, disabling SMEP tests\n"); + } else { + printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); + set_cr4_smep(0); + } + } + + ac_env_int(&pool); + ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); + do { + ++tests; + successes += ac_test_exec(&at, &pool); + } while (ac_test_bump(&at)); + + for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { + ++tests; + successes += ac_test_cases[i](&pool); + } + + printf("\n%d tests, %d failures\n", tests, tests - successes); + + return successes == tests; +} + +int main() +{ + int r; + + setup_idt(); + + cpuid_7_ebx = cpuid(7).b; + cpuid_7_ecx = cpuid(7).c; + + printf("starting test\n\n"); + r = ac_test_run(); + return r ? 0 : 1; +} diff --git a/tests/kvm-unit-tests/x86/apic.c b/tests/kvm-unit-tests/x86/apic.c new file mode 100644 index 00000000..e077a442 --- /dev/null +++ b/tests/kvm-unit-tests/x86/apic.c @@ -0,0 +1,486 @@ +#include "libcflat.h" +#include "apic.h" +#include "vm.h" +#include "smp.h" +#include "desc.h" +#include "isr.h" +#include "msr.h" +#include "atomic.h" + +static void test_lapic_existence(void) +{ + u32 lvr; + + lvr = apic_read(APIC_LVR); + printf("apic version: %x\n", lvr); + report("apic existence", (u16)lvr == 0x14); +} + +#define TSC_DEADLINE_TIMER_VECTOR 0xef +#define BROADCAST_VECTOR 0xcf + +static int tdt_count; + +static void tsc_deadline_timer_isr(isr_regs_t *regs) +{ + ++tdt_count; + eoi(); +} + +static void __test_tsc_deadline_timer(void) +{ + handle_irq(TSC_DEADLINE_TIMER_VECTOR, tsc_deadline_timer_isr); + irq_enable(); + + wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC)); + asm volatile ("nop"); + report("tsc deadline timer", tdt_count == 1); + report("tsc deadline timer clearing", rdmsr(MSR_IA32_TSCDEADLINE) == 0); +} + +static int enable_tsc_deadline_timer(void) +{ + uint32_t lvtt; + + if (cpuid(1).c & (1 << 24)) { + lvtt = APIC_LVT_TIMER_TSCDEADLINE | TSC_DEADLINE_TIMER_VECTOR; + apic_write(APIC_LVTT, lvtt); + return 1; + } else { + return 0; + } +} + +static void test_tsc_deadline_timer(void) +{ + if(enable_tsc_deadline_timer()) { + __test_tsc_deadline_timer(); + } else { + report_skip("tsc deadline timer not detected"); + } +} + +static void do_write_apicbase(void *data) +{ + wrmsr(MSR_IA32_APICBASE, *(u64 *)data); +} + +void test_enable_x2apic(void) +{ + u64 invalid_state = APIC_DEFAULT_PHYS_BASE | APIC_BSP | APIC_EXTD; + u64 apic_enabled = APIC_DEFAULT_PHYS_BASE | APIC_BSP | APIC_EN; + u64 x2apic_enabled = + APIC_DEFAULT_PHYS_BASE | APIC_BSP | APIC_EN | APIC_EXTD; + + if (enable_x2apic()) { + printf("x2apic enabled\n"); + + report("x2apic enabled to invalid state", + test_for_exception(GP_VECTOR, do_write_apicbase, + &invalid_state)); + report("x2apic enabled to apic enabled", + test_for_exception(GP_VECTOR, do_write_apicbase, + &apic_enabled)); + + wrmsr(MSR_IA32_APICBASE, APIC_DEFAULT_PHYS_BASE | APIC_BSP); + report("disabled to invalid state", + test_for_exception(GP_VECTOR, do_write_apicbase, + &invalid_state)); + report("disabled to x2apic enabled", + test_for_exception(GP_VECTOR, do_write_apicbase, + &x2apic_enabled)); + + wrmsr(MSR_IA32_APICBASE, apic_enabled); + report("apic enabled to invalid state", + test_for_exception(GP_VECTOR, do_write_apicbase, + &invalid_state)); + + wrmsr(MSR_IA32_APICBASE, x2apic_enabled); + apic_write(APIC_SPIV, 0x1ff); + } else { + printf("x2apic not detected\n"); + + report("enable unsupported x2apic", + test_for_exception(GP_VECTOR, do_write_apicbase, + &x2apic_enabled)); + } +} + +static void test_apic_disable(void) +{ + u64 orig_apicbase = rdmsr(MSR_IA32_APICBASE); + + report_prefix_push("apic_disable"); + + report("Local apic enabled", orig_apicbase & APIC_EN); + report("CPUID.1H:EDX.APIC[bit 9] is set", cpuid(1).d & (1 << 9)); + + wrmsr(MSR_IA32_APICBASE, orig_apicbase & ~(APIC_EN | APIC_EXTD)); + report("Local apic disabled", !(rdmsr(MSR_IA32_APICBASE) & APIC_EN)); + report("CPUID.1H:EDX.APIC[bit 9] is clear", !(cpuid(1).d & (1 << 9))); + + wrmsr(MSR_IA32_APICBASE, orig_apicbase & ~APIC_EXTD); + wrmsr(MSR_IA32_APICBASE, orig_apicbase); + apic_write(APIC_SPIV, 0x1ff); + report("Local apic enabled", rdmsr(MSR_IA32_APICBASE) & APIC_EN); + report("CPUID.1H:EDX.APIC[bit 9] is set", cpuid(1).d & (1 << 9)); + + report_prefix_pop(); +} + +#define ALTERNATE_APIC_BASE 0x42000000 + +static void test_apicbase(void) +{ + u64 orig_apicbase = rdmsr(MSR_IA32_APICBASE); + u32 lvr = apic_read(APIC_LVR); + u64 value; + + wrmsr(MSR_IA32_APICBASE, orig_apicbase & ~(APIC_EN | APIC_EXTD)); + wrmsr(MSR_IA32_APICBASE, ALTERNATE_APIC_BASE | APIC_BSP | APIC_EN); + + report_prefix_push("apicbase"); + + report("relocate apic", + *(volatile u32 *)(ALTERNATE_APIC_BASE + APIC_LVR) == lvr); + + value = orig_apicbase | (1UL << cpuid_maxphyaddr()); + report("reserved physaddr bits", + test_for_exception(GP_VECTOR, do_write_apicbase, &value)); + + value = orig_apicbase | 1; + report("reserved low bits", + test_for_exception(GP_VECTOR, do_write_apicbase, &value)); + + wrmsr(MSR_IA32_APICBASE, orig_apicbase); + apic_write(APIC_SPIV, 0x1ff); + + report_prefix_pop(); +} + +static void do_write_apic_id(void *id) +{ + apic_write(APIC_ID, *(u32 *)id); +} + +static void __test_apic_id(void * unused) +{ + u32 id, newid; + u8 initial_xapic_id = cpuid(1).b >> 24; + u32 initial_x2apic_id = cpuid(0xb).d; + bool x2apic_mode = rdmsr(MSR_IA32_APICBASE) & APIC_EXTD; + + if (x2apic_mode) + reset_apic(); + + id = apic_id(); + report("xapic id matches cpuid", initial_xapic_id == id); + + newid = (id + 1) << 24; + report("writeable xapic id", + !test_for_exception(GP_VECTOR, do_write_apic_id, &newid) && + id + 1 == apic_id()); + + if (!enable_x2apic()) + goto out; + + report("non-writeable x2apic id", + test_for_exception(GP_VECTOR, do_write_apic_id, &newid)); + report("sane x2apic id", initial_xapic_id == (apic_id() & 0xff)); + + /* old QEMUs do not set initial x2APIC ID */ + report("x2apic id matches cpuid", + initial_xapic_id == (initial_x2apic_id & 0xff) && + initial_x2apic_id == apic_id()); + +out: + reset_apic(); + + report("correct xapic id after reset", initial_xapic_id == apic_id()); + + /* old KVMs do not reset xAPIC ID */ + if (id != apic_id()) + apic_write(APIC_ID, id << 24); + + if (x2apic_mode) + enable_x2apic(); +} + +static void test_apic_id(void) +{ + if (cpu_count() < 2) + return; + + on_cpu(1, __test_apic_id, NULL); +} + +static int ipi_count; + +static void self_ipi_isr(isr_regs_t *regs) +{ + ++ipi_count; + eoi(); +} + +static void test_self_ipi(void) +{ + int vec = 0xf1; + + handle_irq(vec, self_ipi_isr); + irq_enable(); + apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | vec, + 0); + asm volatile ("nop"); + report("self ipi", ipi_count == 1); +} + +volatile int nmi_counter_private, nmi_counter, nmi_hlt_counter, sti_loop_active; + +void sti_nop(char *p) +{ + asm volatile ( + ".globl post_sti \n\t" + "sti \n" + /* + * vmx won't exit on external interrupt if blocked-by-sti, + * so give it a reason to exit by accessing an unmapped page. + */ + "post_sti: testb $0, %0 \n\t" + "nop \n\t" + "cli" + : : "m"(*p) + ); + nmi_counter = nmi_counter_private; +} + +static void sti_loop(void *ignore) +{ + unsigned k = 0; + + while (sti_loop_active) { + sti_nop((char *)(ulong)((k++ * 4096) % (128 * 1024 * 1024))); + } +} + +static void nmi_handler(isr_regs_t *regs) +{ + extern void post_sti(void); + ++nmi_counter_private; + nmi_hlt_counter += regs->rip == (ulong)post_sti; +} + +static void update_cr3(void *cr3) +{ + write_cr3((ulong)cr3); +} + +static void test_sti_nmi(void) +{ + unsigned old_counter; + + if (cpu_count() < 2) { + return; + } + + handle_irq(2, nmi_handler); + on_cpu(1, update_cr3, (void *)read_cr3()); + + sti_loop_active = 1; + on_cpu_async(1, sti_loop, 0); + while (nmi_counter < 30000) { + old_counter = nmi_counter; + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 1); + while (nmi_counter == old_counter) { + ; + } + } + sti_loop_active = 0; + report("nmi-after-sti", nmi_hlt_counter == 0); +} + +static volatile bool nmi_done, nmi_flushed; +static volatile int nmi_received; +static volatile int cpu0_nmi_ctr1, cpu1_nmi_ctr1; +static volatile int cpu0_nmi_ctr2, cpu1_nmi_ctr2; + +static void multiple_nmi_handler(isr_regs_t *regs) +{ + ++nmi_received; +} + +static void kick_me_nmi(void *blah) +{ + while (!nmi_done) { + ++cpu1_nmi_ctr1; + while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1 && !nmi_done) { + pause(); + } + if (nmi_done) { + return; + } + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0); + /* make sure the NMI has arrived by sending an IPI after it */ + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT + | 0x44, 0); + ++cpu1_nmi_ctr2; + while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2 && !nmi_done) { + pause(); + } + } +} + +static void flush_nmi(isr_regs_t *regs) +{ + nmi_flushed = true; + apic_write(APIC_EOI, 0); +} + +static void test_multiple_nmi(void) +{ + int i; + bool ok = true; + + if (cpu_count() < 2) { + return; + } + + sti(); + handle_irq(2, multiple_nmi_handler); + handle_irq(0x44, flush_nmi); + on_cpu_async(1, kick_me_nmi, 0); + for (i = 0; i < 1000000; ++i) { + nmi_flushed = false; + nmi_received = 0; + ++cpu0_nmi_ctr1; + while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1) { + pause(); + } + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0); + while (!nmi_flushed) { + pause(); + } + if (nmi_received != 2) { + ok = false; + break; + } + ++cpu0_nmi_ctr2; + while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2) { + pause(); + } + } + nmi_done = true; + report("multiple nmi", ok); +} + +static volatile int lvtt_counter = 0; + +static void lvtt_handler(isr_regs_t *regs) +{ + lvtt_counter++; + eoi(); +} + +static void test_apic_timer_one_shot(void) +{ + uint64_t tsc1, tsc2; + static const uint32_t interval = 0x10000; + +#define APIC_LVT_TIMER_VECTOR (0xee) + + handle_irq(APIC_LVT_TIMER_VECTOR, lvtt_handler); + irq_enable(); + + /* One shot mode */ + apic_write(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | + APIC_LVT_TIMER_VECTOR); + /* Divider == 1 */ + apic_write(APIC_TDCR, 0x0000000b); + + tsc1 = rdtsc(); + /* Set "Initial Counter Register", which starts the timer */ + apic_write(APIC_TMICT, interval); + while (!lvtt_counter); + tsc2 = rdtsc(); + + /* + * For LVT Timer clock, SDM vol 3 10.5.4 says it should be + * derived from processor's bus clock (IIUC which is the same + * as TSC), however QEMU seems to be using nanosecond. In all + * cases, the following should satisfy on all modern + * processors. + */ + report("APIC LVT timer one shot", (lvtt_counter == 1) && + (tsc2 - tsc1 >= interval)); +} + +static atomic_t broadcast_counter; + +static void broadcast_handler(isr_regs_t *regs) +{ + atomic_inc(&broadcast_counter); + eoi(); +} + +static bool broadcast_received(unsigned ncpus) +{ + unsigned counter; + u64 start = rdtsc(); + + do { + counter = atomic_read(&broadcast_counter); + if (counter >= ncpus) + break; + pause(); + } while (rdtsc() - start < 1000000000); + + atomic_set(&broadcast_counter, 0); + + return counter == ncpus; +} + +static void test_physical_broadcast(void) +{ + unsigned ncpus = cpu_count(); + unsigned long cr3 = read_cr3(); + u32 broadcast_address = enable_x2apic() ? 0xffffffff : 0xff; + + handle_irq(BROADCAST_VECTOR, broadcast_handler); + for (int c = 1; c < ncpus; c++) + on_cpu(c, update_cr3, (void *)cr3); + + printf("starting broadcast (%s)\n", enable_x2apic() ? "x2apic" : "xapic"); + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT | + BROADCAST_VECTOR, broadcast_address); + report("APIC physical broadcast address", broadcast_received(ncpus)); + + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT | + BROADCAST_VECTOR | APIC_DEST_ALLINC, 0); + report("APIC physical broadcast shorthand", broadcast_received(ncpus)); +} + +int main() +{ + setup_vm(); + smp_init(); + + test_lapic_existence(); + + mask_pic_interrupts(); + test_apic_id(); + test_apic_disable(); + + // Disabled in v86: Not supported + //test_enable_x2apic(); + if(false) test_apicbase(); + + test_self_ipi(); + test_physical_broadcast(); + + test_sti_nmi(); + test_multiple_nmi(); + + test_apic_timer_one_shot(); + test_tsc_deadline_timer(); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/asyncpf.c b/tests/kvm-unit-tests/x86/asyncpf.c new file mode 100644 index 00000000..e29e07c5 --- /dev/null +++ b/tests/kvm-unit-tests/x86/asyncpf.c @@ -0,0 +1,109 @@ +/* + * Async PF test. For the test to actually do anything it needs to be started + * in memory cgroup with 512M of memory and with more then 1G memory provided + * to the guest. + * + * To create cgroup do as root: + * mkdir /dev/cgroup + * mount -t cgroup none -omemory /dev/cgroup + * chmod a+rxw /dev/cgroup/ + * + * From a shell you will start qemu from: + * mkdir /dev/cgroup/1 + * echo $$ > /dev/cgroup/1/tasks + * echo 512M > /dev/cgroup/1/memory.limit_in_bytes + * + */ +#include "x86/msr.h" +#include "x86/processor.h" +#include "x86/apic-defs.h" +#include "x86/apic.h" +#include "x86/desc.h" +#include "x86/isr.h" +#include "x86/vm.h" + +#include "libcflat.h" +#include + +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + +#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 + +#define KVM_ASYNC_PF_ENABLED (1 << 0) +#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) + +volatile uint32_t apf_reason __attribute__((aligned(64))); +char *buf; +volatile uint64_t i; +volatile uint64_t phys; + +static inline uint32_t get_apf_reason(void) +{ + uint32_t r = apf_reason; + apf_reason = 0; + return r; +} + +static void pf_isr(struct ex_regs *r) +{ + void* virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1)); + uint32_t reason = get_apf_reason(); + + switch (reason) { + case 0: + report("unexpected #PF at %p", false, read_cr2()); + break; + case KVM_PV_REASON_PAGE_NOT_PRESENT: + phys = virt_to_phys_cr3(virt); + install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0); + write_cr3(read_cr3()); + report("Got not present #PF token %x virt addr %p phys addr %p", + true, read_cr2(), virt, phys); + while(phys) { + safe_halt(); /* enables irq */ + irq_disable(); + } + break; + case KVM_PV_REASON_PAGE_READY: + report("Got present #PF token %x", true, read_cr2()); + if ((uint32_t)read_cr2() == ~0) + break; + install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0); + write_cr3(read_cr3()); + phys = 0; + break; + default: + report("unexpected async pf reason %d", false, reason); + break; + } +} + +#define MEM 1ull*1024*1024*1024 + +int main(int ac, char **av) +{ + int loop = 2; + + setup_vm(); + setup_idt(); + printf("install handler\n"); + handle_exception(14, pf_isr); + apf_reason = 0; + printf("enable async pf\n"); + wrmsr(MSR_KVM_ASYNC_PF_EN, virt_to_phys((void*)&apf_reason) | + KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED); + printf("alloc memory\n"); + buf = vmalloc(MEM); + irq_enable(); + while(loop--) { + printf("start loop\n"); + /* access a lot of memory to make host swap it out */ + for (i=0; i < MEM; i+=4096) + buf[i] = 1; + printf("end loop\n"); + } + irq_disable(); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/cmpxchg8b.c b/tests/kvm-unit-tests/x86/cmpxchg8b.c new file mode 100644 index 00000000..2e5a8e6d --- /dev/null +++ b/tests/kvm-unit-tests/x86/cmpxchg8b.c @@ -0,0 +1,27 @@ +#include "ioram.h" +#include "vm.h" +#include "libcflat.h" +#include "desc.h" +#include "types.h" +#include "processor.h" + +static void test_cmpxchg8b(u32 *mem) +{ + mem[1] = 2; + mem[0] = 1; + asm("push %%ebx\n" + "mov %[ebx_val], %%ebx\n" + "lock cmpxchg8b (%0)\n" + "pop %%ebx" : : "D" (mem), + "d" (2), "a" (1), "c" (4), [ebx_val] "i" (3) : "memory"); + report("cmpxchg8b", mem[0] == 3 && mem[1] == 4); +} + +int main() +{ + setup_vm(); + setup_idt(); + + test_cmpxchg8b(phys_to_virt(read_cr3()) + 4088); + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/cstart.S b/tests/kvm-unit-tests/x86/cstart.S new file mode 100644 index 00000000..69b5c332 --- /dev/null +++ b/tests/kvm-unit-tests/x86/cstart.S @@ -0,0 +1,212 @@ + +#include "apic-defs.h" + +.globl boot_idt +boot_idt = 0 + +ipi_vector = 0x20 + +max_cpus = 64 + +.bss + + . = . + 4096 * max_cpus + .align 16 +stacktop: + + . = . + 4096 + .align 16 +ring0stacktop: + +.data + +.align 4096 +pt: +i = 0 + .rept 1024 + .long 0x1e7 | (i << 22) + i = i + 1 + .endr + +.globl gdt32 +gdt32: + .quad 0 + .quad 0x00cf9b000000ffff // flat 32-bit code segment + .quad 0x00cf93000000ffff // flat 32-bit data segment + .quad 0x00cf1b000000ffff // flat 32-bit code segment, not present + .quad 0x00cffb000000ffff // 64-bit code segment (user) + .quad 0x00cff3000000ffff // 64-bit data segment (user) + + .quad 0 // 10 spare selectors + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + +tss_descr: + .rept max_cpus + .quad 0x000089000000ffff // 32-bit avail tss + .endr +gdt32_end: + +i = 0 +.globl tss +tss: + .rept max_cpus + .long 0 + .long ring0stacktop - i * 4096 + .long 16 + .quad 0, 0 + .quad 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0 + i = i + 1 + .endr +tss_end: + +idt_descr: + .word 16 * 256 - 1 + .long boot_idt + +.section .init + +.code32 + +mb_magic = 0x1BADB002 +mb_flags = 0x0 + + # multiboot header + .long mb_magic, mb_flags, 0 - (mb_magic + mb_flags) +mb_cmdline = 16 + +MSR_GS_BASE = 0xc0000101 + +.macro setup_percpu_area + lea -4096(%esp), %eax + mov $0, %edx + mov $MSR_GS_BASE, %ecx + wrmsr +.endm + +.globl start +start: + push %ebx + call setup_get_initrd + call setup_environ + mov mb_cmdline(%ebx), %eax + mov %eax, __args + call __setup_args + mov $stacktop, %esp + setup_percpu_area + call prepare_32 + jmpl $8, $start32 + +prepare_32: + lgdtl gdt32_descr + + mov %cr4, %eax + bts $4, %eax // pse + mov %eax, %cr4 + + mov $pt, %eax + mov %eax, %cr3 + + mov %cr0, %eax + bts $0, %eax + bts $31, %eax + mov %eax, %cr0 + ret + +smp_stacktop: .long 0xa0000 + +ap_start32: + mov $0x10, %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %fs + mov %ax, %gs + mov %ax, %ss + mov $-4096, %esp + lock/xaddl %esp, smp_stacktop + setup_percpu_area + call prepare_32 + call load_tss + call enable_apic + call enable_x2apic + sti + nop + lock incw cpu_online_count + +1: hlt + jmp 1b + +start32: + call load_tss + call mask_pic_interrupts + call enable_apic + call smp_init + call enable_x2apic + push $__environ + push $__argv + push __argc + call main + push %eax + call exit + +load_tss: + lidt idt_descr + mov $16, %eax + mov %ax, %ss + mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %eax + mov (%eax), %eax + shr $24, %eax + mov %eax, %ebx + shl $3, %ebx + mov $((tss_end - tss) / max_cpus), %edx + imul %edx + add $tss, %eax + mov %ax, tss_descr+2(%ebx) + shr $16, %eax + mov %al, tss_descr+4(%ebx) + shr $8, %eax + mov %al, tss_descr+7(%ebx) + lea tss_descr-gdt32(%ebx), %eax + ltr %ax + ret + +smp_init: + cld + lea sipi_entry, %esi + xor %edi, %edi + mov $(sipi_end - sipi_entry), %ecx + rep/movsb + mov $APIC_DEFAULT_PHYS_BASE, %eax + movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT), APIC_ICR(%eax) + movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT), APIC_ICR(%eax) + movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP), APIC_ICR(%eax) + call fwcfg_get_nb_cpus +1: pause + cmpw %ax, cpu_online_count + jne 1b +smp_init_done: + ret + +cpu_online_count: .word 1 + +.code16 +sipi_entry: + mov %cr0, %eax + or $1, %eax + mov %eax, %cr0 + lgdtl gdt32_descr - sipi_entry + ljmpl $8, $ap_start32 + +gdt32_descr: + .word gdt32_end - gdt32 - 1 + .long gdt32 + +sipi_end: diff --git a/tests/kvm-unit-tests/x86/cstart64.S b/tests/kvm-unit-tests/x86/cstart64.S new file mode 100644 index 00000000..004c014b --- /dev/null +++ b/tests/kvm-unit-tests/x86/cstart64.S @@ -0,0 +1,256 @@ + +#include "apic-defs.h" + +.globl boot_idt +boot_idt = 0 + +.globl idt_descr +.globl tss_descr +.globl gdt64_desc + +ipi_vector = 0x20 + +max_cpus = 64 + +.bss + + . = . + 4096 * max_cpus + .align 16 +stacktop: + + . = . + 4096 + .align 16 +ring0stacktop: + +.data + +.align 4096 +.globl ptl2 +ptl2: +i = 0 + .rept 512 * 4 + .quad 0x1e7 | (i << 21) + i = i + 1 + .endr + +.align 4096 +ptl3: + .quad ptl2 + 7 + 0 * 4096 + .quad ptl2 + 7 + 1 * 4096 + .quad ptl2 + 7 + 2 * 4096 + .quad ptl2 + 7 + 3 * 4096 + +.align 4096 +ptl4: + .quad ptl3 + 7 + +.align 4096 + +gdt64_desc: + .word gdt64_end - gdt64 - 1 + .quad gdt64 + +gdt64: + .quad 0 + .quad 0x00af9b000000ffff // 64-bit code segment + .quad 0x00cf93000000ffff // 64-bit data segment + .quad 0x00af1b000000ffff // 64-bit code segment, not present + .quad 0x00affb000000ffff // 64-bit code segment (user) + .quad 0x00cff3000000ffff // 64-bit data segment (user) + .quad 0x00cf9b000000ffff // 32-bit code segment + .quad 0x00cf92000000ffff // 32-bit data segment + .quad 0x008F9A000000FFFF // 16-bit code segment + .quad 0x008F92000000FFFF // 16-bit data segment + + .quad 0 // 6 spare selectors + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + +tss_descr: + .rept max_cpus + .quad 0x000089000000ffff // 64-bit avail tss + .quad 0 // tss high addr + .endr +gdt64_end: + +i = 0 +.globl tss +tss: + .rept max_cpus + .long 0 + .quad ring0stacktop - i * 4096 + .quad 0, 0 + .quad 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0 +i = i + 1 + .endr +tss_end: + +mb_boot_info: .quad 0 + +.section .init + +.code32 + +mb_magic = 0x1BADB002 +mb_flags = 0x0 + + # multiboot header + .long mb_magic, mb_flags, 0 - (mb_magic + mb_flags) +mb_cmdline = 16 + +MSR_GS_BASE = 0xc0000101 + +.macro setup_percpu_area + lea -4096(%esp), %eax + mov $0, %edx + mov $MSR_GS_BASE, %ecx + wrmsr +.endm + +.globl start +start: + mov %ebx, mb_boot_info + mov $stacktop, %esp + setup_percpu_area + call prepare_64 + jmpl $8, $start64 + +prepare_64: + lgdt gdt64_desc + + mov %cr4, %eax + bts $5, %eax // pae + mov %eax, %cr4 + + mov $ptl4, %eax + mov %eax, %cr3 + +efer = 0xc0000080 + mov $efer, %ecx + rdmsr + bts $8, %eax + wrmsr + + mov %cr0, %eax + bts $0, %eax + bts $31, %eax + mov %eax, %cr0 + ret + +smp_stacktop: .long 0xa0000 + +.align 16 + +gdt32: + .quad 0 + .quad 0x00cf9b000000ffff // flat 32-bit code segment + .quad 0x00cf93000000ffff // flat 32-bit data segment +gdt32_end: + +.code16 +sipi_entry: + mov %cr0, %eax + or $1, %eax + mov %eax, %cr0 + lgdtl gdt32_descr - sipi_entry + ljmpl $8, $ap_start32 + +gdt32_descr: + .word gdt32_end - gdt32 - 1 + .long gdt32 + +sipi_end: + +.code32 +ap_start32: + mov $0x10, %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %fs + mov %ax, %gs + mov %ax, %ss + mov $-4096, %esp + lock/xaddl %esp, smp_stacktop + setup_percpu_area + call prepare_64 + ljmpl $8, $ap_start64 + +.code64 +ap_start64: + call load_tss + call enable_apic + call enable_x2apic + sti + nop + lock incw cpu_online_count + +1: hlt + jmp 1b + +start64: + call load_tss + call mask_pic_interrupts + call enable_apic + call smp_init + call enable_x2apic + mov mb_boot_info(%rip), %rbx + mov %rbx, %rdi + call setup_get_initrd + call setup_environ + mov mb_cmdline(%rbx), %eax + mov %rax, __args(%rip) + call __setup_args + mov __argc(%rip), %edi + lea __argv(%rip), %rsi + lea __environ(%rip), %rdx + call main + mov %eax, %edi + call exit + +idt_descr: + .word 16 * 256 - 1 + .quad boot_idt + +load_tss: + lidtq idt_descr + mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %eax + mov (%rax), %eax + shr $24, %eax + mov %eax, %ebx + shl $4, %ebx + mov $((tss_end - tss) / max_cpus), %edx + imul %edx + add $tss, %rax + mov %ax, tss_descr+2(%rbx) + shr $16, %rax + mov %al, tss_descr+4(%rbx) + shr $8, %rax + mov %al, tss_descr+7(%rbx) + shr $8, %rax + mov %eax, tss_descr+8(%rbx) + lea tss_descr-gdt64(%rbx), %rax + ltr %ax + ret + +smp_init: + cld + lea sipi_entry, %rsi + xor %rdi, %rdi + mov $(sipi_end - sipi_entry), %rcx + rep/movsb + mov $APIC_DEFAULT_PHYS_BASE, %eax + movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT), APIC_ICR(%rax) + movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT), APIC_ICR(%rax) + movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP), APIC_ICR(%rax) + call fwcfg_get_nb_cpus +1: pause + cmpw %ax, cpu_online_count + jne 1b +smp_init_done: + ret + +cpu_online_count: .word 1 diff --git a/tests/kvm-unit-tests/x86/debug.c b/tests/kvm-unit-tests/x86/debug.c new file mode 100644 index 00000000..ad188656 --- /dev/null +++ b/tests/kvm-unit-tests/x86/debug.c @@ -0,0 +1,181 @@ +/* + * Test for x86 debugging facilities + * + * Copyright (c) Siemens AG, 2014 + * + * Authors: + * Jan Kiszka + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "libcflat.h" +#include "desc.h" + +static volatile unsigned long bp_addr[10], dr6[10]; +static volatile unsigned int n; +static volatile unsigned long value; + +static unsigned long get_dr6(void) +{ + unsigned long value; + + asm volatile("mov %%dr6,%0" : "=r" (value)); + return value; +} + +static void set_dr0(void *value) +{ + asm volatile("mov %0,%%dr0" : : "r" (value)); +} + +static void set_dr1(void *value) +{ + asm volatile("mov %0,%%dr1" : : "r" (value)); +} + +static void set_dr6(unsigned long value) +{ + asm volatile("mov %0,%%dr6" : : "r" (value)); +} + +static void set_dr7(unsigned long value) +{ + asm volatile("mov %0,%%dr7" : : "r" (value)); +} + +static void handle_db(struct ex_regs *regs) +{ + bp_addr[n] = regs->rip; + dr6[n] = get_dr6(); + + if (dr6[n] & 0x1) + regs->rflags |= (1 << 16); + + if (++n >= 10) { + regs->rflags &= ~(1 << 8); + set_dr7(0x00000400); + } +} + +static void handle_bp(struct ex_regs *regs) +{ + bp_addr[0] = regs->rip; +} + +int main(int ac, char **av) +{ + unsigned long start; + + setup_idt(); + handle_exception(DB_VECTOR, handle_db); + handle_exception(BP_VECTOR, handle_bp); + +sw_bp: + asm volatile("int3"); + report("#BP", bp_addr[0] == (unsigned long)&&sw_bp + 1); + + n = 0; + set_dr0(&&hw_bp1); + set_dr7(0x00000402); +hw_bp1: + asm volatile("nop"); + report("hw breakpoint (test that dr6.BS is not set)", + n == 1 && + bp_addr[0] == ((unsigned long)&&hw_bp1) && dr6[0] == 0xffff0ff1); + + n = 0; + set_dr0(&&hw_bp2); + set_dr6(0x00004002); +hw_bp2: + asm volatile("nop"); + report("hw breakpoint (test that dr6.BS is not cleared)", + n == 1 && + bp_addr[0] == ((unsigned long)&&hw_bp2) && dr6[0] == 0xffff4ff1); + + n = 0; + set_dr6(0); + asm volatile( + "pushf\n\t" + "pop %%rax\n\t" + "or $(1<<8),%%rax\n\t" + "push %%rax\n\t" + "lea (%%rip),%0\n\t" + "popf\n\t" + "and $~(1<<8),%%rax\n\t" + "push %%rax\n\t" + "popf\n\t" + : "=g" (start) : : "rax"); + report("single step", + n == 3 && + bp_addr[0] == start+1+6 && dr6[0] == 0xffff4ff0 && + bp_addr[1] == start+1+6+1 && dr6[1] == 0xffff4ff0 && + bp_addr[2] == start+1+6+1+1 && dr6[2] == 0xffff4ff0); + + /* + * cpuid and rdmsr (among others) trigger VM exits and are then + * emulated. Test that single stepping works on emulated instructions. + */ + n = 0; + set_dr6(0); + asm volatile( + "pushf\n\t" + "pop %%rax\n\t" + "or $(1<<8),%%rax\n\t" + "push %%rax\n\t" + "lea (%%rip),%0\n\t" + "popf\n\t" + "and $~(1<<8),%%rax\n\t" + "push %%rax\n\t" + "xor %%rax,%%rax\n\t" + "cpuid\n\t" + "movl $0x1a0,%%ecx\n\t" + "rdmsr\n\t" + "popf\n\t" + : "=g" (start) : : "rax", "ebx", "ecx", "edx"); + report("single step emulated instructions", + n == 7 && + bp_addr[0] == start+1+6 && dr6[0] == 0xffff4ff0 && + bp_addr[1] == start+1+6+1 && dr6[1] == 0xffff4ff0 && + bp_addr[2] == start+1+6+1+3 && dr6[2] == 0xffff4ff0 && + bp_addr[3] == start+1+6+1+3+2 && dr6[3] == 0xffff4ff0 && + bp_addr[4] == start+1+6+1+3+2+5 && dr6[4] == 0xffff4ff0 && + bp_addr[5] == start+1+6+1+3+2+5+2 && dr6[5] == 0xffff4ff0 && + bp_addr[6] == start+1+6+1+3+2+5+2+1 && dr6[6] == 0xffff4ff0); + + n = 0; + set_dr1((void *)&value); + set_dr7(0x00d0040a); + + asm volatile( + "mov $42,%%rax\n\t" + "mov %%rax,%0\n\t" + : "=m" (value) : : "rax"); +hw_wp1: + report("hw watchpoint (test that dr6.BS is not cleared)", + n == 1 && + bp_addr[0] == ((unsigned long)&&hw_wp1) && dr6[0] == 0xffff4ff2); + + n = 0; + set_dr6(0); + + asm volatile( + "mov $42,%%rax\n\t" + "mov %%rax,%0\n\t" + : "=m" (value) : : "rax"); +hw_wp2: + report("hw watchpoint (test that dr6.BS is not set)", + n == 1 && + bp_addr[0] == ((unsigned long)&&hw_wp2) && dr6[0] == 0xffff0ff2); + + n = 0; + set_dr6(0); +sw_icebp: + asm volatile(".byte 0xf1"); + report("icebp", + n == 1 && + bp_addr[0] == (unsigned long)&&sw_icebp + 1 && + dr6[0] == 0xffff0ff0); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/emulator.c b/tests/kvm-unit-tests/x86/emulator.c new file mode 100644 index 00000000..8d262d83 --- /dev/null +++ b/tests/kvm-unit-tests/x86/emulator.c @@ -0,0 +1,1168 @@ +#include "ioram.h" +#include "vm.h" +#include "libcflat.h" +#include "desc.h" +#include "types.h" +#include "processor.h" + +#define memset __builtin_memset +#define TESTDEV_IO_PORT 0xe0 + +static int exceptions; + +struct regs { + u64 rax, rbx, rcx, rdx; + u64 rsi, rdi, rsp, rbp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 rip, rflags; +}; +struct regs inregs, outregs, save; + +struct insn_desc { + u64 ptr; + size_t len; +}; + +static char st1[] = "abcdefghijklmnop"; + +void test_stringio() +{ + unsigned char r = 0; + asm volatile("cld \n\t" + "movw %0, %%dx \n\t" + "rep outsb \n\t" + : : "i"((short)TESTDEV_IO_PORT), + "S"(st1), "c"(sizeof(st1) - 1)); + asm volatile("inb %1, %0\n\t" : "=a"(r) : "i"((short)TESTDEV_IO_PORT)); + report("outsb up", r == st1[sizeof(st1) - 2]); /* last char */ + + asm volatile("std \n\t" + "movw %0, %%dx \n\t" + "rep outsb \n\t" + : : "i"((short)TESTDEV_IO_PORT), + "S"(st1 + sizeof(st1) - 2), "c"(sizeof(st1) - 1)); + asm volatile("cld \n\t" : : ); + asm volatile("in %1, %0\n\t" : "=a"(r) : "i"((short)TESTDEV_IO_PORT)); + report("outsb down", r == st1[0]); +} + +void test_cmps_one(unsigned char *m1, unsigned char *m3) +{ + void *rsi, *rdi; + long rcx, tmp; + + rsi = m1; rdi = m3; rcx = 30; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsb" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpsb (1)", rcx == 0 && rsi == m1 + 30 && rdi == m3 + 30); + + rsi = m1; rdi = m3; rcx = 30; + asm volatile("or $1, %[tmp]\n\t" // clear ZF + "repe/cmpsb" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpsb (1.zf)", rcx == 0 && rsi == m1 + 30 && rdi == m3 + 30); + + rsi = m1; rdi = m3; rcx = 15; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsw" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpsw (1)", rcx == 0 && rsi == m1 + 30 && rdi == m3 + 30); + + rsi = m1; rdi = m3; rcx = 7; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsl" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpll (1)", rcx == 0 && rsi == m1 + 28 && rdi == m3 + 28); + + rsi = m1; rdi = m3; rcx = 4; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsq" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpsq (1)", rcx == 0 && rsi == m1 + 32 && rdi == m3 + 32); + + rsi = m1; rdi = m3; rcx = 130; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsb" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpsb (2)", + rcx == 29 && rsi == m1 + 101 && rdi == m3 + 101); + + rsi = m1; rdi = m3; rcx = 65; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsw" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpsw (2)", + rcx == 14 && rsi == m1 + 102 && rdi == m3 + 102); + + rsi = m1; rdi = m3; rcx = 32; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsl" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpll (2)", + rcx == 6 && rsi == m1 + 104 && rdi == m3 + 104); + + rsi = m1; rdi = m3; rcx = 16; + asm volatile("xor %[tmp], %[tmp] \n\t" + "repe/cmpsq" + : "+S"(rsi), "+D"(rdi), "+c"(rcx), [tmp]"=&r"(tmp) + : : "cc"); + report("repe/cmpsq (2)", + rcx == 3 && rsi == m1 + 104 && rdi == m3 + 104); + +} + +void test_cmps(void *mem) +{ + unsigned char *m1 = mem, *m2 = mem + 1024; + unsigned char m3[1024]; + + for (int i = 0; i < 100; ++i) + m1[i] = m2[i] = m3[i] = i; + for (int i = 100; i < 200; ++i) + m1[i] = (m3[i] = m2[i] = i) + 1; + test_cmps_one(m1, m3); + test_cmps_one(m1, m2); +} + +void test_scas(void *mem) +{ + bool z; + void *di; + + *(ulong *)mem = 0x77665544332211; + + di = mem; + asm ("scasb; setz %0" : "=rm"(z), "+D"(di) : "a"(0xff11)); + report("scasb match", di == mem + 1 && z); + + di = mem; + asm ("scasb; setz %0" : "=rm"(z), "+D"(di) : "a"(0xff54)); + report("scasb mismatch", di == mem + 1 && !z); + + di = mem; + asm ("scasw; setz %0" : "=rm"(z), "+D"(di) : "a"(0xff2211)); + report("scasw match", di == mem + 2 && z); + + di = mem; + asm ("scasw; setz %0" : "=rm"(z), "+D"(di) : "a"(0xffdd11)); + report("scasw mismatch", di == mem + 2 && !z); + + di = mem; + asm ("scasl; setz %0" : "=rm"(z), "+D"(di) : "a"(0xff44332211ul)); + report("scasd match", di == mem + 4 && z); + + di = mem; + asm ("scasl; setz %0" : "=rm"(z), "+D"(di) : "a"(0x45332211)); + report("scasd mismatch", di == mem + 4 && !z); + + di = mem; + asm ("scasq; setz %0" : "=rm"(z), "+D"(di) : "a"(0x77665544332211ul)); + report("scasq match", di == mem + 8 && z); + + di = mem; + asm ("scasq; setz %0" : "=rm"(z), "+D"(di) : "a"(3)); + report("scasq mismatch", di == mem + 8 && !z); +} + +void test_cr8(void) +{ + unsigned long src, dst; + + dst = 777; + src = 3; + asm volatile("mov %[src], %%cr8; mov %%cr8, %[dst]" + : [dst]"+r"(dst), [src]"+r"(src)); + report("mov %%cr8", dst == 3 && src == 3); +} + +void test_push(void *mem) +{ + unsigned long tmp; + unsigned long *stack_top = mem + 4096; + unsigned long *new_stack_top; + unsigned long memw = 0x123456789abcdeful; + + memset(mem, 0x55, (void *)stack_top - mem); + + asm volatile("mov %%rsp, %[tmp] \n\t" + "mov %[stack_top], %%rsp \n\t" + "pushq $-7 \n\t" + "pushq %[reg] \n\t" + "pushq (%[mem]) \n\t" + "pushq $-7070707 \n\t" + "mov %%rsp, %[new_stack_top] \n\t" + "mov %[tmp], %%rsp" + : [tmp]"=&r"(tmp), [new_stack_top]"=r"(new_stack_top) + : [stack_top]"r"(stack_top), + [reg]"r"(-17l), [mem]"r"(&memw) + : "memory"); + + report("push $imm8", stack_top[-1] == -7ul); + report("push %%reg", stack_top[-2] == -17ul); + report("push mem", stack_top[-3] == 0x123456789abcdeful); + report("push $imm", stack_top[-4] == -7070707); +} + +void test_pop(void *mem) +{ + unsigned long tmp, tmp3, rsp, rbp; + unsigned long *stack_top = mem + 4096; + unsigned long memw = 0x123456789abcdeful; + static unsigned long tmp2; + + memset(mem, 0x55, (void *)stack_top - mem); + + asm volatile("pushq %[val] \n\t" + "popq (%[mem])" + : : [val]"m"(memw), [mem]"r"(mem) : "memory"); + report("pop mem", *(unsigned long *)mem == memw); + + memw = 7 - memw; + asm volatile("mov %%rsp, %[tmp] \n\t" + "mov %[stack_top], %%rsp \n\t" + "pushq %[val] \n\t" + "popq %[tmp2] \n\t" + "mov %[tmp], %%rsp" + : [tmp]"=&r"(tmp), [tmp2]"=m"(tmp2) + : [val]"r"(memw), [stack_top]"r"(stack_top) + : "memory"); + report("pop mem (2)", tmp2 == memw); + + memw = 129443 - memw; + asm volatile("mov %%rsp, %[tmp] \n\t" + "mov %[stack_top], %%rsp \n\t" + "pushq %[val] \n\t" + "popq %[tmp2] \n\t" + "mov %[tmp], %%rsp" + : [tmp]"=&r"(tmp), [tmp2]"=r"(tmp2) + : [val]"r"(memw), [stack_top]"r"(stack_top) + : "memory"); + report("pop reg", tmp2 == memw); + + asm volatile("mov %%rsp, %[tmp] \n\t" + "mov %[stack_top], %%rsp \n\t" + "push $1f \n\t" + "ret \n\t" + "2: jmp 2b \n\t" + "1: mov %[tmp], %%rsp" + : [tmp]"=&r"(tmp) : [stack_top]"r"(stack_top) + : "memory"); + report("ret", 1); + + stack_top[-1] = 0x778899; + asm volatile("mov %[stack_top], %%r8 \n\t" + "mov %%rsp, %%r9 \n\t" + "xchg %%rbp, %%r8 \n\t" + "leave \n\t" + "xchg %%rsp, %%r9 \n\t" + "xchg %%rbp, %%r8 \n\t" + "mov %%r9, %[tmp] \n\t" + "mov %%r8, %[tmp3]" + : [tmp]"=&r"(tmp), [tmp3]"=&r"(tmp3) : [stack_top]"r"(stack_top-1) + : "memory", "r8", "r9"); + report("leave", tmp == (ulong)stack_top && tmp3 == 0x778899); + + rbp = 0xaa55aa55bb66bb66ULL; + rsp = (unsigned long)stack_top; + asm volatile("mov %[rsp], %%r8 \n\t" + "mov %[rbp], %%r9 \n\t" + "xchg %%rsp, %%r8 \n\t" + "xchg %%rbp, %%r9 \n\t" + "enter $0x1238, $0 \n\t" + "xchg %%rsp, %%r8 \n\t" + "xchg %%rbp, %%r9 \n\t" + "xchg %%r8, %[rsp] \n\t" + "xchg %%r9, %[rbp]" + : [rsp]"+a"(rsp), [rbp]"+b"(rbp) : : "memory", "r8", "r9"); + report("enter", + rsp == (unsigned long)stack_top - 8 - 0x1238 + && rbp == (unsigned long)stack_top - 8 + && stack_top[-1] == 0xaa55aa55bb66bb66ULL); +} + +void test_ljmp(void *mem) +{ + unsigned char *m = mem; + volatile int res = 1; + + *(unsigned long**)m = &&jmpf; + asm volatile ("data16/mov %%cs, %0":"=m"(*(m + sizeof(unsigned long)))); + asm volatile ("rex64/ljmp *%0"::"m"(*m)); + res = 0; +jmpf: + report("ljmp", res); +} + +void test_incdecnotneg(void *mem) +{ + unsigned long *m = mem, v = 1234; + unsigned char *mb = mem, vb = 66; + + *m = 0; + + asm volatile ("incl %0":"+m"(*m)); + report("incl", *m == 1); + asm volatile ("decl %0":"+m"(*m)); + report("decl", *m == 0); + asm volatile ("incb %0":"+m"(*m)); + report("incb", *m == 1); + asm volatile ("decb %0":"+m"(*m)); + report("decb", *m == 0); + + asm volatile ("lock incl %0":"+m"(*m)); + report("lock incl", *m == 1); + asm volatile ("lock decl %0":"+m"(*m)); + report("lock decl", *m == 0); + asm volatile ("lock incb %0":"+m"(*m)); + report("lock incb", *m == 1); + asm volatile ("lock decb %0":"+m"(*m)); + report("lock decb", *m == 0); + + *m = v; + + asm ("lock negq %0" : "+m"(*m)); v = -v; + report("lock negl", *m == v); + asm ("lock notq %0" : "+m"(*m)); v = ~v; + report("lock notl", *m == v); + + *mb = vb; + + asm ("lock negb %0" : "+m"(*mb)); vb = -vb; + report("lock negb", *mb == vb); + asm ("lock notb %0" : "+m"(*mb)); vb = ~vb; + report("lock notb", *mb == vb); +} + +void test_smsw(uint64_t *h_mem) +{ + char mem[16]; + unsigned short msw, msw_orig, *pmsw; + int i, zero; + + msw_orig = read_cr0(); + + asm("smsw %0" : "=r"(msw)); + report("smsw (1)", msw == msw_orig); + + memset(mem, 0, 16); + pmsw = (void *)mem; + asm("smsw %0" : "=m"(pmsw[4])); + zero = 1; + for (i = 0; i < 8; ++i) + if (i != 4 && pmsw[i]) + zero = 0; + report("smsw (2)", msw == pmsw[4] && zero); + + /* Trigger exit on smsw */ + *h_mem = 0x12345678abcdeful; + asm volatile("smsw %0" : "+m"(*h_mem)); + report("smsw (3)", msw == (unsigned short)*h_mem && + (*h_mem & ~0xfffful) == 0x12345678ab0000ul); +} + +void test_lmsw(void) +{ + char mem[16]; + unsigned short msw, *pmsw; + unsigned long cr0; + + cr0 = read_cr0(); + + msw = cr0 ^ 8; + asm("lmsw %0" : : "r"(msw)); + printf("before %lx after %lx\n", cr0, read_cr0()); + report("lmsw (1)", (cr0 ^ read_cr0()) == 8); + + pmsw = (void *)mem; + *pmsw = cr0; + asm("lmsw %0" : : "m"(*pmsw)); + printf("before %lx after %lx\n", cr0, read_cr0()); + report("lmsw (2)", cr0 == read_cr0()); + + /* lmsw can't clear cr0.pe */ + msw = (cr0 & ~1ul) ^ 4; /* change EM to force trap */ + asm("lmsw %0" : : "r"(msw)); + report("lmsw (3)", (cr0 ^ read_cr0()) == 4 && (cr0 & 1)); + + /* back to normal */ + msw = cr0; + asm("lmsw %0" : : "r"(msw)); +} + +void test_xchg(void *mem) +{ + unsigned long *memq = mem; + unsigned long rax; + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xchg %%al, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xchg reg, r/m (1)", + rax == 0xfedcba98765432ef && *memq == 0x123456789abcd10); + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xchg %%ax, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xchg reg, r/m (2)", + rax == 0xfedcba987654cdef && *memq == 0x123456789ab3210); + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xchg %%eax, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xchg reg, r/m (3)", + rax == 0x89abcdef && *memq == 0x123456776543210); + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xchg %%rax, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xchg reg, r/m (4)", + rax == 0x123456789abcdef && *memq == 0xfedcba9876543210); +} + +void test_xadd(void *mem) +{ + unsigned long *memq = mem; + unsigned long rax; + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xadd %%al, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xadd reg, r/m (1)", + rax == 0xfedcba98765432ef && *memq == 0x123456789abcdff); + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xadd %%ax, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xadd reg, r/m (2)", + rax == 0xfedcba987654cdef && *memq == 0x123456789abffff); + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xadd %%eax, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xadd reg, r/m (3)", + rax == 0x89abcdef && *memq == 0x1234567ffffffff); + + asm volatile("mov $0x123456789abcdef, %%rax\n\t" + "mov %%rax, (%[memq])\n\t" + "mov $0xfedcba9876543210, %%rax\n\t" + "xadd %%rax, (%[memq])\n\t" + "mov %%rax, %[rax]\n\t" + : [rax]"=r"(rax) + : [memq]"r"(memq) + : "memory", "rax"); + report("xadd reg, r/m (4)", + rax == 0x123456789abcdef && *memq == 0xffffffffffffffff); +} + +void test_btc(void *mem) +{ + unsigned int *a = mem; + + memset(mem, 0, 4 * sizeof(unsigned int)); + + asm ("btcl $32, %0" :: "m"(a[0]) : "memory"); + asm ("btcl $1, %0" :: "m"(a[1]) : "memory"); + asm ("btcl %1, %0" :: "m"(a[0]), "r"(66) : "memory"); + report("btcl imm8, r/m", a[0] == 1 && a[1] == 2 && a[2] == 4); + + asm ("btcl %1, %0" :: "m"(a[3]), "r"(-1) : "memory"); + report("btcl reg, r/m", a[0] == 1 && a[1] == 2 && a[2] == 0x80000004); + + asm ("btcq %1, %0" : : "m"(a[2]), "r"(-1l) : "memory"); + report("btcq reg, r/m", a[0] == 1 && a[1] == 0x80000002 && + a[2] == 0x80000004 && a[3] == 0); +} + +void test_bsfbsr(void *mem) +{ + unsigned long rax, *memq = mem; + unsigned eax, *meml = mem; + unsigned short ax, *memw = mem; + unsigned char z; + + *memw = 0xc000; + asm("bsfw %[mem], %[a]" : [a]"=a"(ax) : [mem]"m"(*memw)); + report("bsfw r/m, reg", ax == 14); + + *meml = 0xc0000000; + asm("bsfl %[mem], %[a]" : [a]"=a"(eax) : [mem]"m"(*meml)); + report("bsfl r/m, reg", eax == 30); + + *memq = 0xc00000000000; + asm("bsfq %[mem], %[a]" : [a]"=a"(rax) : [mem]"m"(*memq)); + report("bsfq r/m, reg", rax == 46); + + *memq = 0; + asm("bsfq %[mem], %[a]; setz %[z]" + : [a]"=a"(rax), [z]"=rm"(z) : [mem]"m"(*memq)); + report("bsfq r/m, reg", z == 1); + + *memw = 0xc000; + asm("bsrw %[mem], %[a]" : [a]"=a"(ax) : [mem]"m"(*memw)); + report("bsrw r/m, reg", ax == 15); + + *meml = 0xc0000000; + asm("bsrl %[mem], %[a]" : [a]"=a"(eax) : [mem]"m"(*meml)); + report("bsrl r/m, reg", eax == 31); + + *memq = 0xc00000000000; + asm("bsrq %[mem], %[a]" : [a]"=a"(rax) : [mem]"m"(*memq)); + report("bsrq r/m, reg", rax == 47); + + *memq = 0; + asm("bsrq %[mem], %[a]; setz %[z]" + : [a]"=a"(rax), [z]"=rm"(z) : [mem]"m"(*memq)); + report("bsrq r/m, reg", z == 1); +} + +static void test_imul(ulong *mem) +{ + ulong a; + + *mem = 51; a = 0x1234567812345678UL; + asm ("imulw %1, %%ax" : "+a"(a) : "m"(*mem)); + report("imul ax, mem", a == 0x12345678123439e8); + + *mem = 51; a = 0x1234567812345678UL; + asm ("imull %1, %%eax" : "+a"(a) : "m"(*mem)); + report("imul eax, mem", a == 0xa06d39e8); + + *mem = 51; a = 0x1234567812345678UL; + asm ("imulq %1, %%rax" : "+a"(a) : "m"(*mem)); + report("imul rax, mem", a == 0xA06D39EBA06D39E8UL); + + *mem = 0x1234567812345678UL; a = 0x8765432187654321L; + asm ("imulw $51, %1, %%ax" : "+a"(a) : "m"(*mem)); + report("imul ax, mem, imm8", a == 0x87654321876539e8); + + *mem = 0x1234567812345678UL; + asm ("imull $51, %1, %%eax" : "+a"(a) : "m"(*mem)); + report("imul eax, mem, imm8", a == 0xa06d39e8); + + *mem = 0x1234567812345678UL; + asm ("imulq $51, %1, %%rax" : "+a"(a) : "m"(*mem)); + report("imul rax, mem, imm8", a == 0xA06D39EBA06D39E8UL); + + *mem = 0x1234567812345678UL; a = 0x8765432187654321L; + asm ("imulw $311, %1, %%ax" : "+a"(a) : "m"(*mem)); + report("imul ax, mem, imm", a == 0x8765432187650bc8); + + *mem = 0x1234567812345678UL; + asm ("imull $311, %1, %%eax" : "+a"(a) : "m"(*mem)); + report("imul eax, mem, imm", a == 0x1d950bc8); + + *mem = 0x1234567812345678UL; + asm ("imulq $311, %1, %%rax" : "+a"(a) : "m"(*mem)); + report("imul rax, mem, imm", a == 0x1D950BDE1D950BC8L); +} + +static void test_muldiv(long *mem) +{ + long a, d, aa, dd; + u8 ex = 1; + + *mem = 0; a = 1; d = 2; + asm (ASM_TRY("1f") "divq %3; movb $0, %2; 1:" + : "+a"(a), "+d"(d), "+q"(ex) : "m"(*mem)); + report("divq (fault)", a == 1 && d == 2 && ex); + + *mem = 987654321098765UL; a = 123456789012345UL; d = 123456789012345UL; + asm (ASM_TRY("1f") "divq %3; movb $0, %2; 1:" + : "+a"(a), "+d"(d), "+q"(ex) : "m"(*mem)); + report("divq (1)", + a == 0x1ffffffb1b963b33ul && d == 0x273ba4384ede2ul && !ex); + aa = 0x1111111111111111; dd = 0x2222222222222222; + *mem = 0x3333333333333333; a = aa; d = dd; + asm("mulb %2" : "+a"(a), "+d"(d) : "m"(*mem)); + report("mulb mem", a == 0x1111111111110363 && d == dd); + *mem = 0x3333333333333333; a = aa; d = dd; + asm("mulw %2" : "+a"(a), "+d"(d) : "m"(*mem)); + report("mulw mem", a == 0x111111111111c963 && d == 0x2222222222220369); + *mem = 0x3333333333333333; a = aa; d = dd; + asm("mull %2" : "+a"(a), "+d"(d) : "m"(*mem)); + report("mull mem", a == 0x962fc963 && d == 0x369d036); + *mem = 0x3333333333333333; a = aa; d = dd; + asm("mulq %2" : "+a"(a), "+d"(d) : "m"(*mem)); + report("mulq mem", a == 0x2fc962fc962fc963 && d == 0x369d0369d0369d0); +} + +typedef unsigned __attribute__((vector_size(16))) sse128; + +typedef union { + sse128 sse; + unsigned u[4]; +} sse_union; + +static bool sseeq(sse_union *v1, sse_union *v2) +{ + bool ok = true; + int i; + + for (i = 0; i < 4; ++i) { + ok &= v1->u[i] == v2->u[i]; + } + + return ok; +} + +static void test_sse(sse_union *mem) +{ + sse_union v; + + write_cr0(read_cr0() & ~6); /* EM, TS */ + write_cr4(read_cr4() | 0x200); /* OSFXSR */ + v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; + asm("movdqu %1, %0" : "=m"(*mem) : "x"(v.sse)); + report("movdqu (read)", sseeq(&v, mem)); + mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + asm("movdqu %1, %0" : "=x"(v.sse) : "m"(*mem)); + report("movdqu (write)", sseeq(mem, &v)); + + v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; + asm("movaps %1, %0" : "=m"(*mem) : "x"(v.sse)); + report("movaps (read)", sseeq(mem, &v)); + mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + asm("movaps %1, %0" : "=x"(v.sse) : "m"(*mem)); + report("movaps (write)", sseeq(&v, mem)); + + v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; + asm("movapd %1, %0" : "=m"(*mem) : "x"(v.sse)); + report("movapd (read)", sseeq(mem, &v)); + mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + asm("movapd %1, %0" : "=x"(v.sse) : "m"(*mem)); + report("movapd (write)", sseeq(&v, mem)); +} + +static void test_mmx(uint64_t *mem) +{ + uint64_t v; + + write_cr0(read_cr0() & ~6); /* EM, TS */ + asm volatile("fninit"); + v = 0x0102030405060708ULL; + asm("movq %1, %0" : "=m"(*mem) : "y"(v)); + report("movq (mmx, read)", v == *mem); + *mem = 0x8070605040302010ull; + asm("movq %1, %0" : "=y"(v) : "m"(*mem)); + report("movq (mmx, write)", v == *mem); +} + +static void test_rip_relative(unsigned *mem, char *insn_ram) +{ + /* movb $1, mem+2(%rip) */ + insn_ram[0] = 0xc6; + insn_ram[1] = 0x05; + *(unsigned *)&insn_ram[2] = 2 + (char *)mem - (insn_ram + 7); + insn_ram[6] = 0x01; + /* ret */ + insn_ram[7] = 0xc3; + + *mem = 0; + asm("callq *%1" : "+m"(*mem) : "r"(insn_ram)); + report("movb $imm, 0(%%rip)", *mem == 0x10000); +} + +static void test_shld_shrd(u32 *mem) +{ + *mem = 0x12345678; + asm("shld %2, %1, %0" : "+m"(*mem) : "r"(0xaaaaaaaaU), "c"((u8)3)); + report("shld (cl)", *mem == ((0x12345678 << 3) | 5)); + *mem = 0x12345678; + asm("shrd %2, %1, %0" : "+m"(*mem) : "r"(0x55555555U), "c"((u8)3)); + report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29))); +} + +static void test_cmov(u32 *mem) +{ + u64 val; + *mem = 0xabcdef12u; + asm ("movq $0x1234567812345678, %%rax\n\t" + "cmpl %%eax, %%eax\n\t" + "cmovnel (%[mem]), %%eax\n\t" + "movq %%rax, %[val]\n\t" + : [val]"=r"(val) : [mem]"r"(mem) : "%rax", "cc"); + report("cmovnel", val == 0x12345678ul); +} + +#define INSN_XCHG_ALL \ + "xchg %rax, 0+save \n\t" \ + "xchg %rbx, 8+save \n\t" \ + "xchg %rcx, 16+save \n\t" \ + "xchg %rdx, 24+save \n\t" \ + "xchg %rsi, 32+save \n\t" \ + "xchg %rdi, 40+save \n\t" \ + "xchg %rsp, 48+save \n\t" \ + "xchg %rbp, 56+save \n\t" \ + "xchg %r8, 64+save \n\t" \ + "xchg %r9, 72+save \n\t" \ + "xchg %r10, 80+save \n\t" \ + "xchg %r11, 88+save \n\t" \ + "xchg %r12, 96+save \n\t" \ + "xchg %r13, 104+save \n\t" \ + "xchg %r14, 112+save \n\t" \ + "xchg %r15, 120+save \n\t" + +asm( + ".align 4096\n\t" + "insn_page:\n\t" + "ret\n\t" + "pushf\n\t" + "push 136+save \n\t" + "popf \n\t" + INSN_XCHG_ALL + "test_insn:\n\t" + "in (%dx),%al\n\t" + ".skip 31, 0x90\n\t" + "test_insn_end:\n\t" + INSN_XCHG_ALL + "pushf \n\t" + "pop 136+save \n\t" + "popf \n\t" + "ret \n\t" + "insn_page_end:\n\t" + ".align 4096\n\t" +); + +#define MK_INSN(name, str) \ + asm ( \ + ".pushsection .data.insn \n\t" \ + "insn_" #name ": \n\t" \ + ".quad 1001f, 1002f - 1001f \n\t" \ + ".popsection \n\t" \ + ".pushsection .text.insn, \"ax\" \n\t" \ + "1001: \n\t" \ + "insn_code_" #name ": " str " \n\t" \ + "1002: \n\t" \ + ".popsection" \ + ); \ + extern struct insn_desc insn_##name; + +static void trap_emulator(uint64_t *mem, void *alt_insn_page, + struct insn_desc *alt_insn) +{ + ulong *cr3 = (ulong *)read_cr3(); + void *insn_ram; + extern u8 insn_page[], test_insn[]; + + insn_ram = vmap(virt_to_phys(insn_page), 4096); + memcpy(alt_insn_page, insn_page, 4096); + memcpy(alt_insn_page + (test_insn - insn_page), + (void *)(alt_insn->ptr), alt_insn->len); + save = inregs; + + /* Load the code TLB with insn_page, but point the page tables at + alt_insn_page (and keep the data TLB clear, for AMD decode assist). + This will make the CPU trap on the insn_page instruction but the + hypervisor will see alt_insn_page. */ + install_page(cr3, virt_to_phys(insn_page), insn_ram); + invlpg(insn_ram); + /* Load code TLB */ + asm volatile("call *%0" : : "r"(insn_ram)); + install_page(cr3, virt_to_phys(alt_insn_page), insn_ram); + /* Trap, let hypervisor emulate at alt_insn_page */ + asm volatile("call *%0": : "r"(insn_ram+1)); + + outregs = save; +} + +static unsigned long rip_advance; + +static void advance_rip_and_note_exception(struct ex_regs *regs) +{ + ++exceptions; + regs->rip += rip_advance; +} + +static void test_mmx_movq_mf(uint64_t *mem, uint8_t *insn_page, + uint8_t *alt_insn_page, void *insn_ram) +{ + uint16_t fcw = 0; /* all exceptions unmasked */ + /* movq %mm0, (%rax) */ + void *stack = alloc_page(); + + write_cr0(read_cr0() & ~6); /* TS, EM */ + exceptions = 0; + handle_exception(MF_VECTOR, advance_rip_and_note_exception); + asm volatile("fninit; fldcw %0" : : "m"(fcw)); + asm volatile("fldz; fldz; fdivp"); /* generate exception */ + + MK_INSN(mmx_movq_mf, "movq %mm0, (%rax) \n\t"); + rip_advance = insn_mmx_movq_mf.len; + inregs = (struct regs){ .rsp=(u64)stack+1024 }; + trap_emulator(mem, alt_insn_page, &insn_mmx_movq_mf); + /* exit MMX mode */ + asm volatile("fnclex; emms"); + report("movq mmx generates #MF", exceptions == 1); + handle_exception(MF_VECTOR, 0); +} + +static void test_jmp_noncanonical(uint64_t *mem) +{ + extern char nc_jmp_start, nc_jmp_end; + + *mem = 0x1111111111111111ul; + + exceptions = 0; + rip_advance = &nc_jmp_end - &nc_jmp_start; + handle_exception(GP_VECTOR, advance_rip_and_note_exception); + asm volatile ("nc_jmp_start: jmp *%0; nc_jmp_end:" : : "m"(*mem)); + report("jump to non-canonical address", exceptions == 1); + handle_exception(GP_VECTOR, 0); +} + +static void test_movabs(uint64_t *mem, uint8_t *insn_page, + uint8_t *alt_insn_page, void *insn_ram) +{ + /* mov $0x9090909090909090, %rcx */ + MK_INSN(movabs, "mov $0x9090909090909090, %rcx\n\t"); + inregs = (struct regs){ 0 }; + trap_emulator(mem, alt_insn_page, &insn_movabs); + report("64-bit mov imm2", outregs.rcx == 0x9090909090909090); +} + +static void test_smsw_reg(uint64_t *mem, uint8_t *insn_page, + uint8_t *alt_insn_page, void *insn_ram) +{ + unsigned long cr0 = read_cr0(); + inregs = (struct regs){ .rax = 0x1234567890abcdeful }; + + MK_INSN(smsww, "smsww %ax\n\t"); + trap_emulator(mem, alt_insn_page, &insn_smsww); + report("16-bit smsw reg", (u16)outregs.rax == (u16)cr0 && + outregs.rax >> 16 == inregs.rax >> 16); + + MK_INSN(smswl, "smswl %eax\n\t"); + trap_emulator(mem, alt_insn_page, &insn_smswl); + report("32-bit smsw reg", outregs.rax == (u32)cr0); + + MK_INSN(smswq, "smswq %rax\n\t"); + trap_emulator(mem, alt_insn_page, &insn_smswq); + report("64-bit smsw reg", outregs.rax == cr0); +} + +static void test_nop(uint64_t *mem, uint8_t *insn_page, + uint8_t *alt_insn_page, void *insn_ram) +{ + inregs = (struct regs){ .rax = 0x1234567890abcdeful }; + MK_INSN(nop, "nop\n\t"); + trap_emulator(mem, alt_insn_page, &insn_nop); + report("nop", outregs.rax == inregs.rax); +} + +static void test_mov_dr(uint64_t *mem, uint8_t *insn_page, + uint8_t *alt_insn_page, void *insn_ram) +{ + bool rtm_support = cpuid(7).b & (1 << 11); + unsigned long dr6_fixed_1 = rtm_support ? 0xfffe0ff0ul : 0xffff0ff0ul; + inregs = (struct regs){ .rax = 0 }; + MK_INSN(mov_to_dr6, "movq %rax, %dr6\n\t"); + trap_emulator(mem, alt_insn_page, &insn_mov_to_dr6); + MK_INSN(mov_from_dr6, "movq %dr6, %rax\n\t"); + trap_emulator(mem, alt_insn_page, &insn_mov_from_dr6); + report("mov_dr6", outregs.rax == dr6_fixed_1); +} + +static void test_push16(uint64_t *mem) +{ + uint64_t rsp1, rsp2; + uint16_t r; + + asm volatile ( "movq %%rsp, %[rsp1]\n\t" + "pushw %[v]\n\t" + "popw %[r]\n\t" + "movq %%rsp, %[rsp2]\n\t" + "movq %[rsp1], %%rsp\n\t" : + [rsp1]"=r"(rsp1), [rsp2]"=r"(rsp2), [r]"=r"(r) + : [v]"m"(*mem) : "memory"); + report("push16", rsp1 == rsp2); +} + +static void test_crosspage_mmio(volatile uint8_t *mem) +{ + volatile uint16_t w, *pw; + + pw = (volatile uint16_t *)&mem[4095]; + mem[4095] = 0x99; + mem[4096] = 0x77; + asm volatile("mov %1, %0" : "=r"(w) : "m"(*pw) : "memory"); + report("cross-page mmio read", w == 0x7799); + asm volatile("mov %1, %0" : "=m"(*pw) : "r"((uint16_t)0x88aa)); + report("cross-page mmio write", mem[4095] == 0xaa && mem[4096] == 0x88); +} + +static void test_string_io_mmio(volatile uint8_t *mem) +{ + /* Cross MMIO pages.*/ + volatile uint8_t *mmio = mem + 4032; + + asm volatile("outw %%ax, %%dx \n\t" : : "a"(0x9999), "d"(TESTDEV_IO_PORT)); + + asm volatile ("cld; rep insb" : : "d" (TESTDEV_IO_PORT), "D" (mmio), "c" (1024)); + + report("string_io_mmio", mmio[1023] == 0x99); +} + +/* kvm doesn't allow lidt/lgdt from mmio, so the test is disabled */ +#if 0 +static void test_lgdt_lidt(volatile uint8_t *mem) +{ + struct descriptor_table_ptr orig, fresh = {}; + + sgdt(&orig); + *(struct descriptor_table_ptr *)mem = (struct descriptor_table_ptr) { + .limit = 0xf234, + .base = 0x12345678abcd, + }; + cli(); + asm volatile("lgdt %0" : : "m"(*(struct descriptor_table_ptr *)mem)); + sgdt(&fresh); + lgdt(&orig); + sti(); + report("lgdt (long address)", orig.limit == fresh.limit && orig.base == fresh.base); + + sidt(&orig); + *(struct descriptor_table_ptr *)mem = (struct descriptor_table_ptr) { + .limit = 0x432f, + .base = 0xdbca87654321, + }; + cli(); + asm volatile("lidt %0" : : "m"(*(struct descriptor_table_ptr *)mem)); + sidt(&fresh); + lidt(&orig); + sti(); + report("lidt (long address)", orig.limit == fresh.limit && orig.base == fresh.base); +} +#endif + +static void ss_bad_rpl(struct ex_regs *regs) +{ + extern char ss_bad_rpl_cont; + + ++exceptions; + regs->rip = (ulong)&ss_bad_rpl_cont; +} + +static void test_sreg(volatile uint16_t *mem) +{ + u16 ss = read_ss(); + + // check for null segment load + *mem = 0; + asm volatile("mov %0, %%ss" : : "m"(*mem)); + report("mov null, %%ss", read_ss() == 0); + + // check for exception when ss.rpl != cpl on null segment load + exceptions = 0; + handle_exception(GP_VECTOR, ss_bad_rpl); + *mem = 3; + asm volatile("mov %0, %%ss; ss_bad_rpl_cont:" : : "m"(*mem)); + report("mov null, %%ss (with ss.rpl != cpl)", exceptions == 1 && read_ss() == 0); + handle_exception(GP_VECTOR, 0); + write_ss(ss); +} + +/* Broken emulation causes triple fault, which skips the other tests. */ +#if 0 +static void test_lldt(volatile uint16_t *mem) +{ + u64 gdt[] = { 0, /* null descriptor */ +#ifdef __X86_64__ + 0, /* ldt descriptor is 16 bytes in long mode */ +#endif + 0x0000f82000000ffffull /* ldt descriptor */ }; + struct descriptor_table_ptr gdt_ptr = { .limit = sizeof(gdt) - 1, + .base = (ulong)&gdt }; + struct descriptor_table_ptr orig_gdt; + + cli(); + sgdt(&orig_gdt); + lgdt(&gdt_ptr); + *mem = 0x8; + asm volatile("lldt %0" : : "m"(*mem)); + lgdt(&orig_gdt); + sti(); + report("lldt", sldt() == *mem); +} +#endif + +static void test_ltr(volatile uint16_t *mem) +{ + struct descriptor_table_ptr gdt_ptr; + uint64_t *gdt, *trp; + uint16_t tr = str(); + uint64_t busy_mask = (uint64_t)1 << 41; + + sgdt(&gdt_ptr); + gdt = (uint64_t *)gdt_ptr.base; + trp = &gdt[tr >> 3]; + *trp &= ~busy_mask; + *mem = tr; + asm volatile("ltr %0" : : "m"(*mem) : "memory"); + report("ltr", str() == tr && (*trp & busy_mask)); +} + +static void test_simplealu(u32 *mem) +{ + *mem = 0x1234; + asm("or %1, %0" : "+m"(*mem) : "r"(0x8001)); + report("or", *mem == 0x9235); + asm("add %1, %0" : "+m"(*mem) : "r"(2)); + report("add", *mem == 0x9237); + asm("xor %1, %0" : "+m"(*mem) : "r"(0x1111)); + report("xor", *mem == 0x8326); + asm("sub %1, %0" : "+m"(*mem) : "r"(0x26)); + report("sub", *mem == 0x8300); + asm("clc; adc %1, %0" : "+m"(*mem) : "r"(0x100)); + report("adc(0)", *mem == 0x8400); + asm("stc; adc %1, %0" : "+m"(*mem) : "r"(0x100)); + report("adc(0)", *mem == 0x8501); + asm("clc; sbb %1, %0" : "+m"(*mem) : "r"(0)); + report("sbb(0)", *mem == 0x8501); + asm("stc; sbb %1, %0" : "+m"(*mem) : "r"(0)); + report("sbb(1)", *mem == 0x8500); + asm("and %1, %0" : "+m"(*mem) : "r"(0xfe77)); + report("and", *mem == 0x8400); + asm("test %1, %0" : "+m"(*mem) : "r"(0xf000)); + report("test", *mem == 0x8400); +} + +static void illegal_movbe_handler(struct ex_regs *regs) +{ + extern char bad_movbe_cont; + + ++exceptions; + regs->rip = (ulong)&bad_movbe_cont; +} + +static void test_illegal_movbe(void) +{ + if (!(cpuid(1).c & (1 << 22))) { + report_skip("illegal movbe"); + return; + } + + exceptions = 0; + handle_exception(UD_VECTOR, illegal_movbe_handler); + asm volatile(".byte 0x0f; .byte 0x38; .byte 0xf0; .byte 0xc0;\n\t" + " bad_movbe_cont:" : : : "rax"); + report("illegal movbe", exceptions == 1); + handle_exception(UD_VECTOR, 0); +} + +int main() +{ + void *mem; + void *insn_page, *alt_insn_page; + void *insn_ram; + unsigned long t1, t2; + + setup_vm(); + setup_idt(); + mem = alloc_vpages(2); + install_page((void *)read_cr3(), IORAM_BASE_PHYS, mem); + // install the page twice to test cross-page mmio + install_page((void *)read_cr3(), IORAM_BASE_PHYS, mem + 4096); + insn_page = alloc_page(); + alt_insn_page = alloc_page(); + insn_ram = vmap(virt_to_phys(insn_page), 4096); + + // test mov reg, r/m and mov r/m, reg + t1 = 0x123456789abcdef; + asm volatile("mov %[t1], (%[mem]) \n\t" + "mov (%[mem]), %[t2]" + : [t2]"=r"(t2) + : [t1]"r"(t1), [mem]"r"(mem) + : "memory"); + report("mov reg, r/m (1)", t2 == 0x123456789abcdef); + + test_simplealu(mem); + test_cmps(mem); + test_scas(mem); + + test_push(mem); + test_pop(mem); + + test_xchg(mem); + test_xadd(mem); + + test_cr8(); + + test_smsw(mem); + test_lmsw(); + test_ljmp(mem); + test_stringio(); + test_incdecnotneg(mem); + test_btc(mem); + test_bsfbsr(mem); + test_imul(mem); + test_muldiv(mem); + test_sse(mem); + test_mmx(mem); + test_rip_relative(mem, insn_ram); + test_shld_shrd(mem); + //test_lgdt_lidt(mem); + test_sreg(mem); + //test_lldt(mem); + test_ltr(mem); + test_cmov(mem); + + test_mmx_movq_mf(mem, insn_page, alt_insn_page, insn_ram); + test_movabs(mem, insn_page, alt_insn_page, insn_ram); + test_smsw_reg(mem, insn_page, alt_insn_page, insn_ram); + test_nop(mem, insn_page, alt_insn_page, insn_ram); + test_mov_dr(mem, insn_page, alt_insn_page, insn_ram); + test_push16(mem); + test_crosspage_mmio(mem); + + test_string_io_mmio(mem); + + test_jmp_noncanonical(mem); + test_illegal_movbe(); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/eventinj.c b/tests/kvm-unit-tests/x86/eventinj.c new file mode 100644 index 00000000..9ee557b8 --- /dev/null +++ b/tests/kvm-unit-tests/x86/eventinj.c @@ -0,0 +1,421 @@ +#include "libcflat.h" +#include "processor.h" +#include "vm.h" +#include "desc.h" +#include "isr.h" +#include "apic.h" +#include "apic-defs.h" + +#ifdef __x86_64__ +# define R "r" +#else +# define R "e" +#endif + +static inline void io_delay(void) +{ +} + +void apic_self_ipi(u8 v) +{ + apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | + APIC_INT_ASSERT | v, 0); +} + +void apic_self_nmi(void) +{ + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0); +} + +#define flush_phys_addr(__s) outl(__s, 0xe4) +#define flush_stack() do { \ + int __l; \ + flush_phys_addr(virt_to_phys(&__l)); \ + } while (0) + +extern char isr_iret_ip[]; + +static void flush_idt_page() +{ + struct descriptor_table_ptr ptr; + sidt(&ptr); + flush_phys_addr(virt_to_phys((void*)ptr.base)); +} + +static volatile unsigned int test_divider; +static volatile int test_count; + +ulong stack_phys; +void *stack_va; + +void do_pf_tss(void) +{ + printf("PF running\n"); + install_pte(phys_to_virt(read_cr3()), 1, stack_va, + stack_phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0); + invlpg(stack_va); +} + +extern void pf_tss(void); + +asm ("pf_tss: \n\t" +#ifdef __x86_64__ + // no task on x86_64, save/restore caller-save regs + "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n" + "push %r8; push %r9; push %r10; push %r11\n" +#endif + "call do_pf_tss \n\t" +#ifdef __x86_64__ + "pop %r11; pop %r10; pop %r9; pop %r8\n" + "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n" +#endif + "add $"S", %"R "sp\n\t" // discard error code + "iret"W" \n\t" + "jmp pf_tss\n\t" + ); + + +#ifndef __x86_64__ +static void of_isr(struct ex_regs *r) +{ + printf("OF isr running\n"); + test_count++; +} +#endif + +static void np_isr(struct ex_regs *r) +{ + printf("NP isr running %lx err=%lx\n", r->rip, r->error_code); + set_idt_sel(33, read_cs()); + test_count++; +} + +static void de_isr(struct ex_regs *r) +{ + printf("DE isr running divider is %d\n", test_divider); + test_divider = 10; +} + +static void bp_isr(struct ex_regs *r) +{ + printf("BP isr running\n"); + test_count++; +} + +static void nested_nmi_isr(struct ex_regs *r) +{ + printf("Nested NMI isr running rip=%lx\n", r->rip); + + if (r->rip != (ulong)&isr_iret_ip) + test_count++; +} +static void nmi_isr(struct ex_regs *r) +{ + printf("NMI isr running %p\n", &isr_iret_ip); + test_count++; + handle_exception(2, nested_nmi_isr); + printf("Sending nested NMI to self\n"); + apic_self_nmi(); + io_delay(); + printf("After nested NMI to self\n"); +} + +unsigned long *iret_stack; + +static void nested_nmi_iret_isr(struct ex_regs *r) +{ + printf("Nested NMI isr running rip=%lx\n", r->rip); + + if (r->rip == iret_stack[-3]) + test_count++; +} + +extern void do_iret(ulong phys_stack, void *virt_stack); + +// Return to same privilege level won't pop SS or SP, so +// save it in RDX while we run on the nested stack + +asm("do_iret:" +#ifdef __x86_64__ + "mov %rdi, %rax \n\t" // phys_stack + "mov %rsi, %rdx \n\t" // virt_stack +#else + "mov 4(%esp), %eax \n\t" // phys_stack + "mov 8(%esp), %edx \n\t" // virt_stack +#endif + "xchg %"R "dx, %"R "sp \n\t" // point to new stack + "pushf"W" \n\t" + "mov %cs, %ecx \n\t" + "push"W" %"R "cx \n\t" + "push"W" $1f \n\t" + "outl %eax, $0xe4 \n\t" // flush page + "iret"W" \n\t" + "1: xchg %"R "dx, %"R "sp \n\t" // point to old stack + "ret\n\t" + ); + +static void nmi_iret_isr(struct ex_regs *r) +{ + unsigned long *s = alloc_page(); + test_count++; + printf("NMI isr running stack %p\n", s); + handle_exception(2, nested_nmi_iret_isr); + printf("Sending nested NMI to self\n"); + apic_self_nmi(); + printf("After nested NMI to self\n"); + iret_stack = &s[128]; + do_iret(virt_to_phys(s), iret_stack); + printf("After iret\n"); +} + +static void tirq0(isr_regs_t *r) +{ + printf("irq0 running\n"); + if (test_count != 0) + test_count++; + eoi(); +} + +static void tirq1(isr_regs_t *r) +{ + printf("irq1 running\n"); + test_count++; + eoi(); +} + +ulong saved_stack; + +#define switch_stack(S) do { \ + asm volatile ("mov %%" R "sp, %0":"=r"(saved_stack)); \ + asm volatile ("mov %0, %%" R "sp"::"r"(S)); \ + } while(0) + +#define restore_stack() do { \ + asm volatile ("mov %0, %%" R "sp"::"r"(saved_stack)); \ + } while(0) + +int main() +{ + unsigned int res; + ulong *pt, *cr3, i; + + setup_vm(); + setup_idt(); + setup_alt_stack(); + + handle_irq(32, tirq0); + handle_irq(33, tirq1); + + /* generate HW exception that will fault on IDT and stack */ + handle_exception(0, de_isr); + printf("Try to divide by 0\n"); + flush_idt_page(); + flush_stack(); + asm volatile ("divl %3": "=a"(res) + : "d"(0), "a"(1500), "m"(test_divider)); + printf("Result is %d\n", res); + report("DE exception", res == 150); + + /* generate soft exception (BP) that will fault on IDT and stack */ + test_count = 0; + handle_exception(3, bp_isr); + printf("Try int 3\n"); + flush_idt_page(); + flush_stack(); + asm volatile ("int $3"); + printf("After int 3\n"); + report("BP exception", test_count == 1); + +#ifndef __x86_64__ + /* generate soft exception (OF) that will fault on IDT */ + test_count = 0; + handle_exception(4, of_isr); + flush_idt_page(); + printf("Try into\n"); + asm volatile ("addb $127, %b0\ninto"::"a"(127)); + printf("After into\n"); + report("OF exception", test_count == 1); + + /* generate soft exception (OF) using two bit instruction that will + fault on IDT */ + test_count = 0; + handle_exception(4, of_isr); + flush_idt_page(); + printf("Try into\n"); + asm volatile ("addb $127, %b0\naddr16 into"::"a"(127)); + printf("After into\n"); + report("2 byte OF exception", test_count == 1); +#endif + + /* generate HW interrupt that will fault on IDT */ + test_count = 0; + flush_idt_page(); + printf("Sending vec 33 to self\n"); + irq_enable(); + apic_self_ipi(33); + io_delay(); + irq_disable(); + printf("After vec 33 to self\n"); + report("vec 33", test_count == 1); + + /* generate soft interrupt that will fault on IDT and stack */ + test_count = 0; + flush_idt_page(); + printf("Try int $33\n"); + flush_stack(); + asm volatile ("int $33"); + printf("After int $33\n"); + report("int $33", test_count == 1); + + /* Inject two HW interrupt than open iterrupt windows. Both interrupt + will fault on IDT access */ + test_count = 0; + flush_idt_page(); + printf("Sending vec 32 and 33 to self\n"); + apic_self_ipi(32); + apic_self_ipi(33); + io_delay(); + irq_enable(); + asm volatile("nop"); + irq_disable(); + printf("After vec 32 and 33 to self\n"); + report("vec 32/33", test_count == 2); + + + /* Inject HW interrupt, do sti and than (while in irq shadow) inject + soft interrupt. Fault during soft interrupt. Soft interrup shoud be + handled before HW interrupt */ + test_count = 0; + flush_idt_page(); + printf("Sending vec 32 and int $33\n"); + apic_self_ipi(32); + flush_stack(); + io_delay(); + asm volatile ("sti; int $33"); + irq_disable(); + printf("After vec 32 and int $33\n"); + report("vec 32/int $33", test_count == 2); + + /* test that TPR is honored */ + test_count = 0; + handle_irq(62, tirq1); + flush_idt_page(); + printf("Sending vec 33 and 62 and mask one with TPR\n"); + apic_write(APIC_TASKPRI, 0xf << 4); + irq_enable(); + apic_self_ipi(32); + apic_self_ipi(62); + io_delay(); + apic_write(APIC_TASKPRI, 0x2 << 4); + printf("After 33/62 TPR test\n"); + report("TPR", test_count == 1); + apic_write(APIC_TASKPRI, 0x0); + while(test_count != 2); /* wait for second irq */ + irq_disable(); + + /* test fault durint NP delivery */ + printf("Before NP test\n"); + test_count = 0; + handle_exception(11, np_isr); + set_idt_sel(33, NP_SEL); + flush_idt_page(); + flush_stack(); + asm volatile ("int $33"); + printf("After int33\n"); + report("NP exception", test_count == 2); + + /* generate NMI that will fault on IDT */ + test_count = 0; + handle_exception(2, nmi_isr); + flush_idt_page(); + printf("Sending NMI to self\n"); + apic_self_nmi(); + printf("After NMI to self\n"); + /* this is needed on VMX without NMI window notification. + Interrupt windows is used instead, so let pending NMI + to be injected */ + irq_enable(); + asm volatile ("nop"); + irq_disable(); + report("NMI", test_count == 2); + + /* generate NMI that will fault on IRET */ + printf("Before NMI IRET test\n"); + test_count = 0; + handle_exception(2, nmi_iret_isr); + printf("Sending NMI to self\n"); + apic_self_nmi(); + /* this is needed on VMX without NMI window notification. + Interrupt windows is used instead, so let pending NMI + to be injected */ + irq_enable(); + asm volatile ("nop"); + irq_disable(); + printf("After NMI to self\n"); + report("NMI", test_count == 2); + stack_phys = (ulong)virt_to_phys(alloc_page()); + stack_va = alloc_vpage(); + + /* Generate DE and PF exceptions serially */ + test_divider = 0; + set_intr_alt_stack(14, pf_tss); + handle_exception(0, de_isr); + printf("Try to divide by 0\n"); + /* install read only pte */ + install_pte(phys_to_virt(read_cr3()), 1, stack_va, + stack_phys | PT_PRESENT_MASK, 0); + invlpg(stack_va); + flush_phys_addr(stack_phys); + switch_stack(stack_va + 4095); + flush_idt_page(); + asm volatile ("divl %3": "=a"(res) + : "d"(0), "a"(1500), "m"(test_divider)); + restore_stack(); + printf("Result is %d\n", res); + report("DE PF exceptions", res == 150); + + /* Generate NP and PF exceptions serially */ + printf("Before NP test\n"); + test_count = 0; + set_intr_alt_stack(14, pf_tss); + handle_exception(11, np_isr); + set_idt_sel(33, NP_SEL); + /* install read only pte */ + install_pte(phys_to_virt(read_cr3()), 1, stack_va, + stack_phys | PT_PRESENT_MASK, 0); + invlpg(stack_va); + flush_idt_page(); + flush_phys_addr(stack_phys); + switch_stack(stack_va + 4095); + asm volatile ("int $33"); + restore_stack(); + printf("After int33\n"); + report("NP PF exceptions", test_count == 2); + + pt = alloc_page(); + cr3 = (void*)read_cr3(); + memset(pt, 0, 4096); + /* use shadowed stack during interrupt delivery */ + for (i = 0; i < 4096/sizeof(ulong); i++) { + if (!cr3[i]) { + cr3[i] = virt_to_phys(pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK; + pt[0] = virt_to_phys(pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK; +#ifndef __x86_64__ + ((ulong*)(i<<22))[1] = 0; +#else + ((ulong*)(i<<39))[1] = 0; +#endif + write_cr3(virt_to_phys(cr3)); + break; + } + } + test_count = 0; + printf("Try int 33 with shadowed stack\n"); + switch_stack(((char*)pt) + 4095); + asm volatile("int $33"); + restore_stack(); + printf("After int 33 with shadowed stack\n"); + report("int 33 with shadowed stack", test_count == 1); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/flat.lds b/tests/kvm-unit-tests/x86/flat.lds new file mode 100644 index 00000000..a278b56c --- /dev/null +++ b/tests/kvm-unit-tests/x86/flat.lds @@ -0,0 +1,21 @@ +SECTIONS +{ + . = 4M + SIZEOF_HEADERS; + stext = .; + .text : { *(.init) *(.text) *(.text.*) } + . = ALIGN(4K); + .data : { + *(.data) + exception_table_start = .; + *(.data.ex) + exception_table_end = .; + } + . = ALIGN(16); + .rodata : { *(.rodata) } + . = ALIGN(16); + .bss : { *(.bss) } + . = ALIGN(4K); + edata = .; +} + +ENTRY(start) diff --git a/tests/kvm-unit-tests/x86/hypercall.c b/tests/kvm-unit-tests/x86/hypercall.c new file mode 100644 index 00000000..9380f785 --- /dev/null +++ b/tests/kvm-unit-tests/x86/hypercall.c @@ -0,0 +1,80 @@ +#include "libcflat.h" +#include "vm.h" +#include "desc.h" + +#define KVM_HYPERCALL_INTEL ".byte 0x0f,0x01,0xc1" +#define KVM_HYPERCALL_AMD ".byte 0x0f,0x01,0xd9" + +static inline long kvm_hypercall0_intel(unsigned int nr) +{ + long ret; + asm volatile(KVM_HYPERCALL_INTEL + : "=a"(ret) + : "a"(nr)); + return ret; +} + +static inline long kvm_hypercall0_amd(unsigned int nr) +{ + long ret; + asm volatile(KVM_HYPERCALL_AMD + : "=a"(ret) + : "a"(nr)); + return ret; +} + + +volatile unsigned long test_rip; +#ifdef __x86_64__ +extern void gp_tss(void); +asm ("gp_tss: \n\t" + "add $8, %rsp\n\t" // discard error code + "popq test_rip(%rip)\n\t" // pop return address + "pushq %rsi\n\t" // new return address + "iretq\n\t" + "jmp gp_tss\n\t" + ); + +static inline int +test_edge(void) +{ + test_rip = 0; + asm volatile ("movq $-1, %%rax\n\t" // prepare for vmcall + "leaq 1f(%%rip), %%rsi\n\t" // save return address for gp_tss + "movabsq $0x7ffffffffffd, %%rbx\n\t" + "jmp *%%rbx; 1:" : : : "rax", "rbx", "rsi"); + printf("Return from int 13, test_rip = %lx\n", test_rip); + return test_rip == (1ul << 47); +} +#endif + +int main(int ac, char **av) +{ + kvm_hypercall0_intel(-1u); + printf("Hypercall via VMCALL: OK\n"); + kvm_hypercall0_amd(-1u); + printf("Hypercall via VMMCALL: OK\n"); + +#ifdef __x86_64__ + setup_vm(); + setup_idt(); + setup_alt_stack(); + set_intr_alt_stack(13, gp_tss); + + u8 *data1 = alloc_page(); + u8 *topmost = (void *) ((1ul << 47) - PAGE_SIZE); + + install_pte(phys_to_virt(read_cr3()), 1, topmost, + virt_to_phys(data1) | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0); + memset(topmost, 0xcc, PAGE_SIZE); + topmost[4093] = 0x0f; + topmost[4094] = 0x01; + topmost[4095] = 0xc1; + report("VMCALL on edge of canonical address space (intel)", test_edge()); + + topmost[4095] = 0xd9; + report("VMMCALL on edge of canonical address space (AMD)", test_edge()); +#endif + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/hyperv.c b/tests/kvm-unit-tests/x86/hyperv.c new file mode 100644 index 00000000..2511aa2e --- /dev/null +++ b/tests/kvm-unit-tests/x86/hyperv.c @@ -0,0 +1,25 @@ +#include "hyperv.h" +#include "asm/io.h" + +static void synic_ctl(u8 ctl, u8 vcpu_id, u8 sint) +{ + outl((ctl << 16)|((vcpu_id) << 8)|sint, 0x3000); +} + +void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi) +{ + wrmsr(HV_X64_MSR_SINT0 + sint, + (u64)vec | ((auto_eoi) ? HV_SYNIC_SINT_AUTO_EOI : 0)); + synic_ctl(HV_TEST_DEV_SINT_ROUTE_CREATE, vcpu, sint); +} + +void synic_sint_set(int vcpu, int sint) +{ + synic_ctl(HV_TEST_DEV_SINT_ROUTE_SET_SINT, vcpu, sint); +} + +void synic_sint_destroy(int vcpu, int sint) +{ + wrmsr(HV_X64_MSR_SINT0 + sint, 0xFF|HV_SYNIC_SINT_MASKED); + synic_ctl(HV_TEST_DEV_SINT_ROUTE_DESTROY, vcpu, sint); +} diff --git a/tests/kvm-unit-tests/x86/hyperv.h b/tests/kvm-unit-tests/x86/hyperv.h new file mode 100644 index 00000000..bef03177 --- /dev/null +++ b/tests/kvm-unit-tests/x86/hyperv.h @@ -0,0 +1,191 @@ +#ifndef __HYPERV_H +#define __HYPERV_H + +#include "libcflat.h" +#include "processor.h" + +#define HYPERV_CPUID_FEATURES 0x40000003 + +#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2) +#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) + +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 + +/* Define synthetic interrupt controller model specific registers. */ +#define HV_X64_MSR_SCONTROL 0x40000080 +#define HV_X64_MSR_SVERSION 0x40000081 +#define HV_X64_MSR_SIEFP 0x40000082 +#define HV_X64_MSR_SIMP 0x40000083 +#define HV_X64_MSR_EOM 0x40000084 +#define HV_X64_MSR_SINT0 0x40000090 +#define HV_X64_MSR_SINT1 0x40000091 +#define HV_X64_MSR_SINT2 0x40000092 +#define HV_X64_MSR_SINT3 0x40000093 +#define HV_X64_MSR_SINT4 0x40000094 +#define HV_X64_MSR_SINT5 0x40000095 +#define HV_X64_MSR_SINT6 0x40000096 +#define HV_X64_MSR_SINT7 0x40000097 +#define HV_X64_MSR_SINT8 0x40000098 +#define HV_X64_MSR_SINT9 0x40000099 +#define HV_X64_MSR_SINT10 0x4000009A +#define HV_X64_MSR_SINT11 0x4000009B +#define HV_X64_MSR_SINT12 0x4000009C +#define HV_X64_MSR_SINT13 0x4000009D +#define HV_X64_MSR_SINT14 0x4000009E +#define HV_X64_MSR_SINT15 0x4000009F + +/* + * Synthetic Timer MSRs. Four timers per vcpu. + */ + +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 + +#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) +#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) +#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) +#define HV_SYNIC_SINT_MASKED (1ULL << 16) +#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) +#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) +#define HV_SYNIC_SINT_COUNT 16 + +#define HV_STIMER_ENABLE (1ULL << 0) +#define HV_STIMER_PERIODIC (1ULL << 1) +#define HV_STIMER_LAZY (1ULL << 2) +#define HV_STIMER_AUTOENABLE (1ULL << 3) +#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) + +#define HV_SYNIC_STIMER_COUNT (4) + +/* Define synthetic interrupt controller message constants. */ +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) + +/* Define hypervisor message types. */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +}; + +/* Define synthetic interrupt controller message flags. */ +union hv_message_flags { + uint8_t asu8; + struct { + uint8_t msg_pending:1; + uint8_t reserved:7; + }; +}; + +union hv_port_id { + uint32_t asu32; + struct { + uint32_t id:24; + uint32_t reserved:8; + } u; +}; + +/* Define port type. */ +enum hv_port_type { + HVPORT_MSG = 1, + HVPORT_EVENT = 2, + HVPORT_MONITOR = 3 +}; + +/* Define synthetic interrupt controller message header. */ +struct hv_message_header { + uint32_t message_type; + uint8_t payload_size; + union hv_message_flags message_flags; + uint8_t reserved[2]; + union { + uint64_t sender; + union hv_port_id port; + }; +}; + +/* Define timer message payload structure. */ +struct hv_timer_message_payload { + uint32_t timer_index; + uint32_t reserved; + uint64_t expiration_time; /* When the timer expired */ + uint64_t delivery_time; /* When the message was delivered */ +}; + +/* Define synthetic interrupt controller message format. */ +struct hv_message { + struct hv_message_header header; + union { + uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u; +}; + +/* Define the synthetic interrupt message page layout. */ +struct hv_message_page { + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; +}; + +enum { + HV_TEST_DEV_SINT_ROUTE_CREATE = 1, + HV_TEST_DEV_SINT_ROUTE_DESTROY, + HV_TEST_DEV_SINT_ROUTE_SET_SINT +}; + +static inline bool synic_supported(void) +{ + return cpuid(HYPERV_CPUID_FEATURES).a & HV_X64_MSR_SYNIC_AVAILABLE; +} + +static inline bool stimer_supported(void) +{ + return cpuid(HYPERV_CPUID_FEATURES).a & HV_X64_MSR_SYNIC_AVAILABLE; +} + +static inline bool hv_time_ref_counter_supported(void) +{ + return cpuid(HYPERV_CPUID_FEATURES).a & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE; +} + +void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi); +void synic_sint_set(int vcpu, int sint); +void synic_sint_destroy(int vcpu, int sint); + +struct hv_reference_tsc_page { + uint32_t tsc_sequence; + uint32_t res1; + uint64_t tsc_scale; + int64_t tsc_offset; +}; + + +#endif diff --git a/tests/kvm-unit-tests/x86/hyperv_clock.c b/tests/kvm-unit-tests/x86/hyperv_clock.c new file mode 100644 index 00000000..8b1deba6 --- /dev/null +++ b/tests/kvm-unit-tests/x86/hyperv_clock.c @@ -0,0 +1,209 @@ +#include "libcflat.h" +#include "smp.h" +#include "atomic.h" +#include "processor.h" +#include "hyperv.h" +#include "vm.h" + +#define MAX_CPU 4 +#define TICKS_PER_SEC (1000000000 / 100) + +struct hv_reference_tsc_page *hv_clock; + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u64 mul_frac) +{ + u64 product, unused; + + __asm__ ( + "mul %3" + : "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) ); + + return product; +} + +static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, uint64_t tsc) +{ + u64 delta = tsc; + return scale_delta(delta, shadow->tsc_scale) + shadow->tsc_offset; +} + +/* + * Reads a consistent set of time-base values from hypervisor, + * into a shadow data area. + */ +static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow, + struct hv_reference_tsc_page *page) +{ + int seq; + do { + seq = page->tsc_sequence; + rmb(); /* fetch version before data */ + *shadow = *page; + rmb(); /* test version after fetching data */ + } while (shadow->tsc_sequence != seq); +} + +uint64_t hv_clock_read(void) +{ + struct hv_reference_tsc_page shadow; + + hvclock_get_time_values(&shadow, hv_clock); + return hvclock_tsc_to_ticks(&shadow, rdtsc()); +} + +atomic_t cpus_left; +bool ok[MAX_CPU]; +uint64_t loops[MAX_CPU]; + +#define iabs(x) ((x) < 0 ? -(x) : (x)) + +static void hv_clock_test(void *data) +{ + int i = smp_id(); + uint64_t t = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + uint64_t end = t + 3 * TICKS_PER_SEC; + uint64_t msr_sample = t + TICKS_PER_SEC; + int min_delta = 123456, max_delta = -123456; + bool got_drift = false; + bool got_warp = false; + + ok[i] = true; + do { + uint64_t now = hv_clock_read(); + int delta = rdmsr(HV_X64_MSR_TIME_REF_COUNT) - now; + + min_delta = delta < min_delta ? delta : min_delta; + if (t < msr_sample) { + max_delta = delta > max_delta ? delta: max_delta; + } else if (delta < 0 || delta > max_delta * 3 / 2) { + printf("suspecting drift on CPU %d? delta = %d, acceptable [0, %d)\n", smp_id(), + delta, max_delta); + ok[i] = false; + got_drift = true; + max_delta *= 2; + } + + if (now < t && !got_warp) { + printf("warp on CPU %d!\n", smp_id()); + ok[i] = false; + got_warp = true; + break; + } + t = now; + } while(t < end); + + if (!got_drift) + printf("delta on CPU %d was %d...%d\n", smp_id(), min_delta, max_delta); + barrier(); + atomic_dec(&cpus_left); +} + +static void check_test(int ncpus) +{ + int i; + bool pass; + + atomic_set(&cpus_left, ncpus); + for (i = ncpus - 1; i >= 0; i--) + on_cpu_async(i, hv_clock_test, NULL); + + /* Wait for the end of other vcpu */ + while(atomic_read(&cpus_left)) + ; + + pass = true; + for (i = ncpus - 1; i >= 0; i--) + pass &= ok[i]; + + report("TSC reference precision test", pass); +} + +static void hv_perf_test(void *data) +{ + uint64_t t = hv_clock_read(); + uint64_t end = t + 1000000000 / 100; + uint64_t local_loops = 0; + + do { + t = hv_clock_read(); + local_loops++; + } while(t < end); + + loops[smp_id()] = local_loops; + atomic_dec(&cpus_left); +} + +static void perf_test(int ncpus) +{ + int i; + uint64_t total_loops; + + atomic_set(&cpus_left, ncpus); + for (i = ncpus - 1; i >= 0; i--) + on_cpu_async(i, hv_perf_test, NULL); + + /* Wait for the end of other vcpu */ + while(atomic_read(&cpus_left)) + ; + + total_loops = 0; + for (i = ncpus - 1; i >= 0; i--) + total_loops += loops[i]; + printf("iterations/sec: %" PRId64"\n", total_loops / ncpus); +} + +int main(int ac, char **av) +{ + int nerr = 0; + int ncpus; + struct hv_reference_tsc_page shadow; + uint64_t tsc1, t1, tsc2, t2; + uint64_t ref1, ref2; + + setup_vm(); + smp_init(); + + hv_clock = alloc_page(); + wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)hv_clock | 1); + report("MSR value after enabling", + rdmsr(HV_X64_MSR_REFERENCE_TSC) == ((u64)(uintptr_t)hv_clock | 1)); + + hvclock_get_time_values(&shadow, hv_clock); + if (shadow.tsc_sequence == 0 || shadow.tsc_sequence == 0xFFFFFFFF) { + printf("Reference TSC page not available\n"); + exit(1); + } + + printf("scale: %" PRIx64" offset: %" PRId64"\n", shadow.tsc_scale, shadow.tsc_offset); + ref1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + tsc1 = rdtsc(); + t1 = hvclock_tsc_to_ticks(&shadow, tsc1); + printf("refcnt %" PRId64", TSC %" PRIx64", TSC reference %" PRId64"\n", + ref1, tsc1, t1); + + do + ref2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + while (ref2 < ref1 + 2 * TICKS_PER_SEC); + + tsc2 = rdtsc(); + t2 = hvclock_tsc_to_ticks(&shadow, tsc2); + printf("refcnt %" PRId64" (delta %" PRId64"), TSC %" PRIx64", " + "TSC reference %" PRId64" (delta %" PRId64")\n", + ref2, ref2 - ref1, tsc2, t2, t2 - t1); + + ncpus = cpu_count(); + if (ncpus > MAX_CPU) + ncpus = MAX_CPU; + + check_test(ncpus); + perf_test(ncpus); + + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0LL); + report("MSR value after disabling", rdmsr(HV_X64_MSR_REFERENCE_TSC) == 0); + + return nerr > 0 ? 1 : 0; +} diff --git a/tests/kvm-unit-tests/x86/hyperv_stimer.c b/tests/kvm-unit-tests/x86/hyperv_stimer.c new file mode 100644 index 00000000..52925237 --- /dev/null +++ b/tests/kvm-unit-tests/x86/hyperv_stimer.c @@ -0,0 +1,372 @@ +#include "libcflat.h" +#include "processor.h" +#include "msr.h" +#include "isr.h" +#include "vm.h" +#include "apic.h" +#include "desc.h" +#include "smp.h" +#include "atomic.h" +#include "hyperv.h" +#include "asm/barrier.h" + +#define MAX_CPUS 4 + +#define SINT1_VEC 0xF1 +#define SINT2_VEC 0xF2 + +#define SINT1_NUM 2 +#define SINT2_NUM 3 +#define ONE_MS_IN_100NS 10000 + +static atomic_t g_cpus_comp_count; +static int g_cpus_count; +static struct spinlock g_synic_alloc_lock; + +struct stimer { + int sint; + int index; + atomic_t fire_count; +}; + +struct svcpu { + int vcpu; + void *msg_page; + void *evt_page; + struct stimer timer[HV_SYNIC_STIMER_COUNT]; +}; + +static struct svcpu g_synic_vcpu[MAX_CPUS]; + +static void *synic_alloc_page(void) +{ + void *page; + + spin_lock(&g_synic_alloc_lock); + page = alloc_page(); + spin_unlock(&g_synic_alloc_lock); + return page; +} + +static void synic_free_page(void *page) +{ + spin_lock(&g_synic_alloc_lock); + free_page(page); + spin_unlock(&g_synic_alloc_lock); +} + +static void stimer_init(struct stimer *timer, int index) +{ + memset(timer, 0, sizeof(*timer)); + timer->index = index; +} + +static void synic_enable(void) +{ + int vcpu = smp_id(), i; + struct svcpu *svcpu = &g_synic_vcpu[vcpu]; + + memset(svcpu, 0, sizeof(*svcpu)); + svcpu->vcpu = vcpu; + svcpu->msg_page = synic_alloc_page(); + for (i = 0; i < ARRAY_SIZE(svcpu->timer); i++) { + stimer_init(&svcpu->timer[i], i); + } + wrmsr(HV_X64_MSR_SIMP, (u64)virt_to_phys(svcpu->msg_page) | + HV_SYNIC_SIMP_ENABLE); + wrmsr(HV_X64_MSR_SCONTROL, HV_SYNIC_CONTROL_ENABLE); +} + +static void stimer_shutdown(struct stimer *timer) +{ + wrmsr(HV_X64_MSR_STIMER0_CONFIG + 2*timer->index, 0); +} + +static void process_stimer_expired(struct svcpu *svcpu, struct stimer *timer, + u64 expiration_time, u64 delivery_time) +{ + atomic_inc(&timer->fire_count); +} + +static void process_stimer_msg(struct svcpu *svcpu, + struct hv_message *msg, int sint) +{ + struct hv_timer_message_payload *payload = + (struct hv_timer_message_payload *)msg->u.payload; + struct stimer *timer; + + if (msg->header.message_type != HVMSG_TIMER_EXPIRED && + msg->header.message_type != HVMSG_NONE) { + report("invalid Hyper-V SynIC msg type", false); + report_summary(); + abort(); + } + + if (msg->header.message_type == HVMSG_NONE) { + return; + } + + if (msg->header.payload_size < sizeof(*payload)) { + report("invalid Hyper-V SynIC msg payload size", false); + report_summary(); + abort(); + } + + /* Now process timer expiration message */ + + if (payload->timer_index >= ARRAY_SIZE(svcpu->timer)) { + report("invalid Hyper-V SynIC timer index", false); + report_summary(); + abort(); + } + timer = &svcpu->timer[payload->timer_index]; + process_stimer_expired(svcpu, timer, payload->expiration_time, + payload->delivery_time); + + msg->header.message_type = HVMSG_NONE; + mb(); + if (msg->header.message_flags.msg_pending) { + wrmsr(HV_X64_MSR_EOM, 0); + } +} + +static void __stimer_isr(int vcpu) +{ + struct svcpu *svcpu = &g_synic_vcpu[vcpu]; + struct hv_message_page *msg_page; + struct hv_message *msg; + int i; + + + msg_page = (struct hv_message_page *)svcpu->msg_page; + for (i = 0; i < ARRAY_SIZE(msg_page->sint_message); i++) { + msg = &msg_page->sint_message[i]; + process_stimer_msg(svcpu, msg, i); + } +} + +static void stimer_isr(isr_regs_t *regs) +{ + int vcpu = smp_id(); + + __stimer_isr(vcpu); + eoi(); +} + +static void stimer_isr_auto_eoi(isr_regs_t *regs) +{ + int vcpu = smp_id(); + + __stimer_isr(vcpu); +} + +static void stimer_start(struct stimer *timer, + bool auto_enable, bool periodic, + u64 tick_100ns, int sint) +{ + u64 config, count; + + timer->sint = sint; + atomic_set(&timer->fire_count, 0); + + config = 0; + if (periodic) { + config |= HV_STIMER_PERIODIC; + } + + config |= ((u8)(sint & 0xFF)) << 16; + config |= HV_STIMER_ENABLE; + if (auto_enable) { + config |= HV_STIMER_AUTOENABLE; + } + + if (periodic) { + count = tick_100ns; + } else { + count = rdmsr(HV_X64_MSR_TIME_REF_COUNT) + tick_100ns; + } + + if (!auto_enable) { + wrmsr(HV_X64_MSR_STIMER0_COUNT + timer->index*2, count); + wrmsr(HV_X64_MSR_STIMER0_CONFIG + timer->index*2, config); + } else { + wrmsr(HV_X64_MSR_STIMER0_CONFIG + timer->index*2, config); + wrmsr(HV_X64_MSR_STIMER0_COUNT + timer->index*2, count); + } +} + +static void stimers_shutdown(void) +{ + int vcpu = smp_id(), i; + struct svcpu *svcpu = &g_synic_vcpu[vcpu]; + + for (i = 0; i < ARRAY_SIZE(svcpu->timer); i++) { + stimer_shutdown(&svcpu->timer[i]); + } +} + +static void synic_disable(void) +{ + int vcpu = smp_id(); + struct svcpu *svcpu = &g_synic_vcpu[vcpu]; + + wrmsr(HV_X64_MSR_SCONTROL, 0); + wrmsr(HV_X64_MSR_SIMP, 0); + wrmsr(HV_X64_MSR_SIEFP, 0); + synic_free_page(svcpu->msg_page); +} + +static void cpu_comp(void) +{ + atomic_inc(&g_cpus_comp_count); +} + +static void stimer_test_prepare(void *ctx) +{ + int vcpu = smp_id(); + + write_cr3((ulong)ctx); + synic_enable(); + synic_sint_create(vcpu, SINT1_NUM, SINT1_VEC, false); + synic_sint_create(vcpu, SINT2_NUM, SINT2_VEC, true); + cpu_comp(); +} + +static void stimer_test_periodic(int vcpu, struct stimer *timer1, + struct stimer *timer2) +{ + /* Check periodic timers */ + stimer_start(timer1, false, true, ONE_MS_IN_100NS, SINT1_NUM); + stimer_start(timer2, false, true, ONE_MS_IN_100NS, SINT2_NUM); + while ((atomic_read(&timer1->fire_count) < 1000) || + (atomic_read(&timer2->fire_count) < 1000)) { + pause(); + } + report("Hyper-V SynIC periodic timers test vcpu %d", true, vcpu); + stimer_shutdown(timer1); + stimer_shutdown(timer2); +} + +static void stimer_test_one_shot(int vcpu, struct stimer *timer) +{ + /* Check one-shot timer */ + stimer_start(timer, false, false, ONE_MS_IN_100NS, SINT1_NUM); + while (atomic_read(&timer->fire_count) < 1) { + pause(); + } + report("Hyper-V SynIC one-shot test vcpu %d", true, vcpu); + stimer_shutdown(timer); +} + +static void stimer_test_auto_enable_one_shot(int vcpu, struct stimer *timer) +{ + /* Check auto-enable one-shot timer */ + stimer_start(timer, true, false, ONE_MS_IN_100NS, SINT1_NUM); + while (atomic_read(&timer->fire_count) < 1) { + pause(); + } + report("Hyper-V SynIC auto-enable one-shot timer test vcpu %d", true, vcpu); + stimer_shutdown(timer); +} + +static void stimer_test_auto_enable_periodic(int vcpu, struct stimer *timer) +{ + /* Check auto-enable periodic timer */ + stimer_start(timer, true, true, ONE_MS_IN_100NS, SINT1_NUM); + while (atomic_read(&timer->fire_count) < 1000) { + pause(); + } + report("Hyper-V SynIC auto-enable periodic timer test vcpu %d", true, vcpu); + stimer_shutdown(timer); +} + +static void stimer_test(void *ctx) +{ + int vcpu = smp_id(); + struct svcpu *svcpu = &g_synic_vcpu[vcpu]; + struct stimer *timer1, *timer2; + + irq_enable(); + + timer1 = &svcpu->timer[0]; + timer2 = &svcpu->timer[1]; + + stimer_test_periodic(vcpu, timer1, timer2); + stimer_test_one_shot(vcpu, timer1); + stimer_test_auto_enable_one_shot(vcpu, timer2); + stimer_test_auto_enable_periodic(vcpu, timer1); + + irq_disable(); + cpu_comp(); +} + +static void stimer_test_cleanup(void *ctx) +{ + int vcpu = smp_id(); + + stimers_shutdown(); + synic_sint_destroy(vcpu, SINT1_NUM); + synic_sint_destroy(vcpu, SINT2_NUM); + synic_disable(); + cpu_comp(); +} + +static void on_each_cpu_async_wait(void (*func)(void *ctx), void *ctx) +{ + int i; + + atomic_set(&g_cpus_comp_count, 0); + for (i = 0; i < g_cpus_count; i++) { + on_cpu_async(i, func, ctx); + } + while (atomic_read(&g_cpus_comp_count) != g_cpus_count) { + pause(); + } +} + +static void stimer_test_all(void) +{ + int ncpus; + + setup_vm(); + smp_init(); + enable_apic(); + + handle_irq(SINT1_VEC, stimer_isr); + handle_irq(SINT2_VEC, stimer_isr_auto_eoi); + + ncpus = cpu_count(); + if (ncpus > MAX_CPUS) { + ncpus = MAX_CPUS; + } + + printf("cpus = %d\n", ncpus); + g_cpus_count = ncpus; + + on_each_cpu_async_wait(stimer_test_prepare, (void *)read_cr3()); + on_each_cpu_async_wait(stimer_test, NULL); + on_each_cpu_async_wait(stimer_test_cleanup, NULL); +} + +int main(int ac, char **av) +{ + + if (!synic_supported()) { + report("Hyper-V SynIC is not supported", true); + goto done; + } + + if (!stimer_supported()) { + report("Hyper-V SynIC timers are not supported", true); + goto done; + } + + if (!hv_time_ref_counter_supported()) { + report("Hyper-V time reference counter is not supported", true); + goto done; + } + + stimer_test_all(); +done: + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/hyperv_synic.c b/tests/kvm-unit-tests/x86/hyperv_synic.c new file mode 100644 index 00000000..74bbd58e --- /dev/null +++ b/tests/kvm-unit-tests/x86/hyperv_synic.c @@ -0,0 +1,207 @@ +#include "libcflat.h" +#include "processor.h" +#include "msr.h" +#include "isr.h" +#include "vm.h" +#include "apic.h" +#include "desc.h" +#include "smp.h" +#include "atomic.h" +#include "hyperv.h" + +#define MAX_CPUS 4 + +static atomic_t isr_enter_count[MAX_CPUS]; +static atomic_t cpus_comp_count; + +static void synic_sint_auto_eoi_isr(isr_regs_t *regs) +{ + atomic_inc(&isr_enter_count[smp_id()]); +} + +static void synic_sint_isr(isr_regs_t *regs) +{ + atomic_inc(&isr_enter_count[smp_id()]); + eoi(); +} + +struct sint_vec_entry { + int vec; + bool auto_eoi; +}; + +struct sint_vec_entry sint_vecs[HV_SYNIC_SINT_COUNT] = { + {0xB0, false}, + {0xB1, false}, + {0xB2, false}, + {0xB3, true}, + {0xB4, false}, + {0xB5, false}, + {0xB6, false}, + {0xB7, false}, + {0xB8, true}, + {0xB9, false}, + {0xBA, true}, + {0xBB, false}, + {0xBC, false}, + {0xBD, false}, + {0xBE, true}, + {0xBF, false}, +}; + +static void synic_prepare_sint_vecs(void) +{ + bool auto_eoi; + int i, vec; + + for (i = 0; i < HV_SYNIC_SINT_COUNT; i++) { + vec = sint_vecs[i].vec; + auto_eoi = sint_vecs[i].auto_eoi; + handle_irq(vec, (auto_eoi) ? synic_sint_auto_eoi_isr : synic_sint_isr); + } +} + +static void synic_sints_prepare(int vcpu) +{ + bool auto_eoi; + int i, vec; + + for (i = 0; i < HV_SYNIC_SINT_COUNT; i++) { + vec = sint_vecs[i].vec; + auto_eoi = sint_vecs[i].auto_eoi; + synic_sint_create(vcpu, i, vec, auto_eoi); + } +} + +static void synic_test_prepare(void *ctx) +{ + u64 r; + int i = 0; + + write_cr3((ulong)ctx); + irq_enable(); + + rdmsr(HV_X64_MSR_SVERSION); + rdmsr(HV_X64_MSR_SIMP); + rdmsr(HV_X64_MSR_SIEFP); + rdmsr(HV_X64_MSR_SCONTROL); + for (i = 0; i < HV_SYNIC_SINT_COUNT; i++) { + rdmsr(HV_X64_MSR_SINT0 + i); + } + r = rdmsr(HV_X64_MSR_EOM); + if (r != 0) { + report("Hyper-V SynIC test, EOM read 0x%llx", false, r); + goto ret; + } + + wrmsr(HV_X64_MSR_SIMP, (u64)virt_to_phys(alloc_page()) | + HV_SYNIC_SIMP_ENABLE); + wrmsr(HV_X64_MSR_SIEFP, (u64)virt_to_phys(alloc_page())| + HV_SYNIC_SIEFP_ENABLE); + wrmsr(HV_X64_MSR_SCONTROL, HV_SYNIC_CONTROL_ENABLE); + + synic_sints_prepare(smp_id()); +ret: + atomic_inc(&cpus_comp_count); +} + +static void synic_sints_test(int dst_vcpu) +{ + int i; + + atomic_set(&isr_enter_count[dst_vcpu], 0); + for (i = 0; i < HV_SYNIC_SINT_COUNT; i++) { + synic_sint_set(dst_vcpu, i); + } + + while (atomic_read(&isr_enter_count[dst_vcpu]) != HV_SYNIC_SINT_COUNT) { + pause(); + } +} + +static void synic_test(void *ctx) +{ + int dst_vcpu = (ulong)ctx; + + irq_enable(); + synic_sints_test(dst_vcpu); + atomic_inc(&cpus_comp_count); +} + +static void synic_test_cleanup(void *ctx) +{ + int vcpu = smp_id(); + int i; + + irq_enable(); + for (i = 0; i < HV_SYNIC_SINT_COUNT; i++) { + synic_sint_destroy(vcpu, i); + wrmsr(HV_X64_MSR_SINT0 + i, 0xFF|HV_SYNIC_SINT_MASKED); + } + + wrmsr(HV_X64_MSR_SCONTROL, 0); + wrmsr(HV_X64_MSR_SIMP, 0); + wrmsr(HV_X64_MSR_SIEFP, 0); + atomic_inc(&cpus_comp_count); +} + +int main(int ac, char **av) +{ + + if (synic_supported()) { + int ncpus, i; + bool ok; + + setup_vm(); + smp_init(); + enable_apic(); + + synic_prepare_sint_vecs(); + + ncpus = cpu_count(); + if (ncpus > MAX_CPUS) { + ncpus = MAX_CPUS; + } + printf("ncpus = %d\n", ncpus); + + atomic_set(&cpus_comp_count, 0); + for (i = 0; i < ncpus; i++) { + on_cpu_async(i, synic_test_prepare, (void *)read_cr3()); + } + printf("prepare\n"); + while (atomic_read(&cpus_comp_count) != ncpus) { + pause(); + } + + atomic_set(&cpus_comp_count, 0); + for (i = 0; i < ncpus; i++) { + printf("test %d -> %d\n", i, ncpus - 1 - i); + on_cpu_async(i, synic_test, (void *)(ulong)(ncpus - 1 - i)); + } + while (atomic_read(&cpus_comp_count) != ncpus) { + pause(); + } + + atomic_set(&cpus_comp_count, 0); + for (i = 0; i < ncpus; i++) { + on_cpu_async(i, synic_test_cleanup, NULL); + } + printf("cleanup\n"); + while (atomic_read(&cpus_comp_count) != ncpus) { + pause(); + } + + ok = true; + for (i = 0; i < ncpus; ++i) { + printf("isr_enter_count[%d] = %d\n", + i, atomic_read(&isr_enter_count[i])); + ok &= atomic_read(&isr_enter_count[i]) == 16; + } + + report("Hyper-V SynIC test", ok); + } else { + printf("Hyper-V SynIC is not supported"); + } + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/idt_test.c b/tests/kvm-unit-tests/x86/idt_test.c new file mode 100644 index 00000000..349aade0 --- /dev/null +++ b/tests/kvm-unit-tests/x86/idt_test.c @@ -0,0 +1,41 @@ +#include "libcflat.h" +#include "desc.h" + +int test_ud2(bool *rflags_rf) +{ + asm volatile(ASM_TRY("1f") + "ud2 \n\t" + "1:" :); + *rflags_rf = exception_rflags_rf(); + return exception_vector(); +} + +int test_gp(bool *rflags_rf) +{ + unsigned long tmp; + + asm volatile("mov $0xffffffff, %0 \n\t" + ASM_TRY("1f") + "mov %0, %%cr4\n\t" + "1:" + : "=a"(tmp)); + *rflags_rf = exception_rflags_rf(); + return exception_vector(); +} + +int main(void) +{ + int r; + bool rflags_rf; + + printf("Starting IDT test\n"); + setup_idt(); + r = test_gp(&rflags_rf); + report("Testing #GP", r == GP_VECTOR); + report("Testing #GP rflags.rf", rflags_rf); + r = test_ud2(&rflags_rf); + report("Testing #UD", r == UD_VECTOR); + report("Testing #UD rflags.rf", rflags_rf); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/init.c b/tests/kvm-unit-tests/x86/init.c new file mode 100644 index 00000000..f47d671e --- /dev/null +++ b/tests/kvm-unit-tests/x86/init.c @@ -0,0 +1,130 @@ +#include "libcflat.h" +#include "apic.h" +#include "asm/io.h" + +#define KBD_CCMD_READ_OUTPORT 0xD0 /* read output port */ +#define KBD_CCMD_WRITE_OUTPORT 0xD1 /* write output port */ +#define KBD_CCMD_RESET 0xFE /* CPU reset */ + +static inline void kbd_cmd(u8 val) +{ + while (inb(0x64) & 2); + outb(val, 0x64); +} + +static inline u8 kbd_in(void) +{ + kbd_cmd(KBD_CCMD_READ_OUTPORT); + while (inb(0x64) & 2); + return inb(0x60); +} + +static inline void kbd_out(u8 val) +{ + kbd_cmd(KBD_CCMD_WRITE_OUTPORT); + while (inb(0x64) & 2); + outb(val, 0x60); +} + +static inline void rtc_out(u8 reg, u8 val) +{ + outb(reg, 0x70); + outb(val, 0x71); +} + +extern char resume_start, resume_end; + +#define state (*(volatile int *)0x2000) +#define bad (*(volatile int *)0x2004) +#define resumed (*(volatile int *)0x2008) + +int main(int argc, char **argv) +{ + volatile u16 *resume_vector_ptr = (u16 *)0x467L; + char *addr, *resume_vec = (void*)0x1000; + + /* resume execution by indirect jump via 40h:0067h */ + rtc_out(0x0f, 0x0a); + resume_vector_ptr[0] = ((u32)(ulong)resume_vec); + resume_vector_ptr[1] = 0; + + for (addr = &resume_start; addr < &resume_end; addr++) + *resume_vec++ = *addr; + + if (state != 0) { + /* + * Strictly speaking this is a firmware problem, but let's check + * for it as well... + */ + if (resumed != 1) { + printf("Uh, resume vector visited %d times?\n", resumed); + bad |= 2; + } + /* + * Port 92 bit 0 is cleared on system reset. On a soft reset it + * is left to 1. Use this to distinguish INIT from hard reset. + */ + if (resumed != 0 && (inb(0x92) & 1) == 0) { + printf("Uh, hard reset!\n"); + bad |= 1; + } + } + + resumed = 0; + + switch (state++) { + case 0: + printf("testing port 92 init... "); + outb(inb(0x92) & ~1, 0x92); + outb(inb(0x92) | 1, 0x92); + break; + + case 1: + printf("testing kbd controller reset... "); + kbd_cmd(KBD_CCMD_RESET); + break; + + case 2: + printf("testing kbd controller init... "); + kbd_out(kbd_in() & ~1); + break; + + case 3: + printf("testing 0xcf9h init... "); + outb(0, 0xcf9); + outb(4, 0xcf9); + break; + + case 4: + printf("testing init to BSP... "); + apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL + | APIC_DM_INIT, 0); + break; + + case 5: + exit(bad); + } + + /* The resume code will get us back to main. */ + asm("cli; hlt"); + __builtin_unreachable(); +} + +asm ( + ".global resume_start\n" + ".global resume_end\n" + ".code16\n" + "resume_start:\n" + "incb %cs:0x2008\n" // resumed++; + "mov $0x0f, %al\n" // rtc_out(0x0f, 0x00); + "out %al, $0x70\n" + "mov $0x00, %al\n" + "out %al, $0x71\n" + "jmp $0xffff, $0x0000\n" // BIOS reset + "resume_end:\n" +#ifdef __i386__ + ".code32\n" +#else + ".code64\n" +#endif + ); diff --git a/tests/kvm-unit-tests/x86/intel-iommu.c b/tests/kvm-unit-tests/x86/intel-iommu.c new file mode 100644 index 00000000..610cc655 --- /dev/null +++ b/tests/kvm-unit-tests/x86/intel-iommu.c @@ -0,0 +1,163 @@ +/* + * Intel IOMMU unit test. + * + * Copyright (C) 2016 Red Hat, Inc. + * + * Authors: + * Peter Xu , + * + * This work is licensed under the terms of the GNU LGPL, version 2 or + * later. + */ + +#include "intel-iommu.h" +#include "pci-edu.h" +#include "x86/apic.h" + +#define VTD_TEST_DMAR_4B ("DMAR 4B memcpy test") +#define VTD_TEST_IR_MSI ("IR MSI") +#define VTD_TEST_IR_IOAPIC ("IR IOAPIC") + +static struct pci_edu_dev edu_dev; + +static void vtd_test_dmar(void) +{ + struct pci_edu_dev *dev = &edu_dev; + void *page = alloc_page(); + + report_prefix_push("vtd_dmar"); + +#define DMA_TEST_WORD (0x12345678) + /* Modify the first 4 bytes of the page */ + *(uint32_t *)page = DMA_TEST_WORD; + + /* + * Map the newly allocated page into IOVA address 0 (size 4K) + * of the device address space. Root entry and context entry + * will be automatically created when needed. + */ + vtd_map_range(dev->pci_dev.bdf, 0, virt_to_phys(page), PAGE_SIZE); + + /* + * DMA the first 4 bytes of the page to EDU device buffer + * offset 0. + */ + edu_dma(dev, 0, 4, 0, false); + + /* + * DMA the first 4 bytes of EDU device buffer into the page + * with offset 4 (so it'll be using 4-7 bytes). + */ + edu_dma(dev, 4, 4, 0, true); + + /* + * Check data match between 0-3 bytes and 4-7 bytes of the + * page. + */ + report(VTD_TEST_DMAR_4B, *((uint32_t *)page + 1) == DMA_TEST_WORD); + + free_page(page); + + report_prefix_pop(); +} + +static volatile bool edu_intr_recved; + +static void edu_isr(isr_regs_t *regs) +{ + edu_intr_recved = true; + eoi(); + edu_reg_writel(&edu_dev, EDU_REG_INTR_ACK, + edu_reg_readl(&edu_dev, EDU_REG_INTR_STATUS)); +} + +static void vtd_test_ir(void) +{ +#define VTD_TEST_VECTOR_IOAPIC (0xed) +#define VTD_TEST_VECTOR_MSI (0xee) + struct pci_edu_dev *dev = &edu_dev; + struct pci_dev *pci_dev = &dev->pci_dev; + + report_prefix_push("vtd_ir"); + + irq_enable(); + + /* This will enable INTx */ + pci_msi_set_enable(pci_dev, false); + vtd_setup_ioapic_irq(pci_dev, VTD_TEST_VECTOR_IOAPIC, + 0, TRIGGER_EDGE); + handle_irq(VTD_TEST_VECTOR_IOAPIC, edu_isr); + + edu_intr_recved = false; + wmb(); + /* Manually trigger INTR */ + edu_reg_writel(dev, EDU_REG_INTR_RAISE, 1); + + while (!edu_intr_recved) + cpu_relax(); + + /* Clear INTR bits */ + edu_reg_writel(dev, EDU_REG_INTR_RAISE, 0); + + /* We are good as long as we reach here */ + report(VTD_TEST_IR_IOAPIC, edu_intr_recved == true); + + /* + * Setup EDU PCI device MSI, using interrupt remapping. By + * default, EDU device is using INTx. + */ + if (!vtd_setup_msi(pci_dev, VTD_TEST_VECTOR_MSI, 0)) { + printf("edu device does not support MSI, skip test\n"); + report_skip(VTD_TEST_IR_MSI); + return; + } + + handle_irq(VTD_TEST_VECTOR_MSI, edu_isr); + + edu_intr_recved = false; + wmb(); + /* Manually trigger INTR */ + edu_reg_writel(dev, EDU_REG_INTR_RAISE, 1); + + while (!edu_intr_recved) + cpu_relax(); + + /* We are good as long as we reach here */ + report(VTD_TEST_IR_MSI, edu_intr_recved == true); + + report_prefix_pop(); +} + +int main(int argc, char *argv[]) +{ + vtd_init(); + + report_prefix_push("vtd_init"); + + report("fault status check", vtd_readl(DMAR_FSTS_REG) == 0); + report("QI enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_QI); + report("DMAR table setup", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_ROOT); + report("IR table setup", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR_TABLE); + report("DMAR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_DMAR); + report("IR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR); + report("DMAR support 39 bits address width", + vtd_readq(DMAR_CAP_REG) & VTD_CAP_SAGAW); + report("DMAR support huge pages", vtd_readq(DMAR_CAP_REG) & VTD_CAP_SLLPS); + + report_prefix_pop(); + + if (!edu_init(&edu_dev)) { + printf("Please specify \"-device edu\" to do " + "further IOMMU tests.\n"); + report_skip(VTD_TEST_DMAR_4B); + report_skip(VTD_TEST_IR_IOAPIC); + report_skip(VTD_TEST_IR_MSI); + } else { + printf("Found EDU device:\n"); + pci_dev_print(&edu_dev.pci_dev); + vtd_test_dmar(); + vtd_test_ir(); + } + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/ioapic.c b/tests/kvm-unit-tests/x86/ioapic.c new file mode 100644 index 00000000..e5cc2593 --- /dev/null +++ b/tests/kvm-unit-tests/x86/ioapic.c @@ -0,0 +1,438 @@ +#include "libcflat.h" +#include "apic.h" +#include "vm.h" +#include "smp.h" +#include "desc.h" +#include "isr.h" + +static void set_ioapic_redir(unsigned line, unsigned vec, + trigger_mode_t trig_mode) +{ + ioapic_redir_entry_t e = { + .vector = vec, + .delivery_mode = 0, + .trig_mode = trig_mode, + }; + + ioapic_write_redir(line, e); +} + +static void set_irq_line(unsigned line, int val) +{ + asm volatile("out %0, %1" : : "a"((u8)val), "d"((u16)(0x2000 + line))); +} + +static void toggle_irq_line(unsigned line) +{ + set_irq_line(line, 1); + set_irq_line(line, 0); +} + +static void ioapic_reg_version(void) +{ + u8 version_offset; + uint32_t data_read, data_write; + + version_offset = 0x01; + data_read = ioapic_read_reg(version_offset); + data_write = data_read ^ 0xffffffff; + + ioapic_write_reg(version_offset, data_write); + report("version register read only test", + data_read == ioapic_read_reg(version_offset)); +} + +static void ioapic_reg_id(void) +{ + u8 id_offset; + uint32_t data_read, data_write, diff; + + id_offset = 0x0; + data_read = ioapic_read_reg(id_offset); + data_write = data_read ^ 0xffffffff; + + ioapic_write_reg(id_offset, data_write); + + diff = data_read ^ ioapic_read_reg(id_offset); + report("id register only bits [24:27] writable", + diff == 0x0f000000); +} + +static void ioapic_arbitration_id(void) +{ + u8 id_offset, arb_offset; + uint32_t write; + + id_offset = 0x0; + arb_offset = 0x2; + write = 0x0f000000; + + ioapic_write_reg(id_offset, write); + report("arbitration register set by id", + ioapic_read_reg(arb_offset) == write); + + ioapic_write_reg(arb_offset, 0x0); + report("arbtration register read only", + ioapic_read_reg(arb_offset) == write); +} + +static volatile int g_isr_76; + +static void ioapic_isr_76(isr_regs_t *regs) +{ + ++g_isr_76; + eoi(); +} + +static void test_ioapic_edge_intr(void) +{ + handle_irq(0x76, ioapic_isr_76); + set_ioapic_redir(0x0e, 0x76, TRIGGER_EDGE); + toggle_irq_line(0x0e); + asm volatile ("nop"); + report("edge triggered intr", g_isr_76 == 1); +} + +static volatile int g_isr_77; + +static void ioapic_isr_77(isr_regs_t *regs) +{ + ++g_isr_77; + set_irq_line(0x0e, 0); + eoi(); +} + +static void test_ioapic_level_intr(void) +{ + handle_irq(0x77, ioapic_isr_77); + set_ioapic_redir(0x0e, 0x77, TRIGGER_LEVEL); + set_irq_line(0x0e, 1); + asm volatile ("nop"); + report("level triggered intr", g_isr_77 == 1); +} + +static int g_78, g_66, g_66_after_78; +static ulong g_66_rip, g_78_rip; + +static void ioapic_isr_78(isr_regs_t *regs) +{ + ++g_78; + g_78_rip = regs->rip; + eoi(); +} + +static void ioapic_isr_66(isr_regs_t *regs) +{ + ++g_66; + if (g_78) + ++g_66_after_78; + g_66_rip = regs->rip; + eoi(); +} + +static void test_ioapic_simultaneous(void) +{ + handle_irq(0x78, ioapic_isr_78); + handle_irq(0x66, ioapic_isr_66); + set_ioapic_redir(0x0e, 0x78, TRIGGER_EDGE); + set_ioapic_redir(0x0f, 0x66, TRIGGER_EDGE); + irq_disable(); + toggle_irq_line(0x0f); + toggle_irq_line(0x0e); + irq_enable(); + asm volatile ("nop"); + report("ioapic simultaneous edge interrupts", + g_66 && g_78 && g_66_after_78 && g_66_rip == g_78_rip); +} + +static volatile int g_tmr_79 = -1; + +static void ioapic_isr_79(isr_regs_t *regs) +{ + g_tmr_79 = apic_read_bit(APIC_TMR, 0x79); + set_irq_line(0x0e, 0); + eoi(); +} + +static void test_ioapic_edge_tmr(bool expected_tmr_before) +{ + int tmr_before; + + handle_irq(0x79, ioapic_isr_79); + set_ioapic_redir(0x0e, 0x79, TRIGGER_EDGE); + tmr_before = apic_read_bit(APIC_TMR, 0x79); + toggle_irq_line(0x0e); + asm volatile ("nop"); + report("TMR for ioapic edge interrupts (expected %s)", + tmr_before == expected_tmr_before && !g_tmr_79, + expected_tmr_before ? "true" : "false"); +} + +static void test_ioapic_level_tmr(bool expected_tmr_before) +{ + int tmr_before; + + handle_irq(0x79, ioapic_isr_79); + set_ioapic_redir(0x0e, 0x79, TRIGGER_LEVEL); + tmr_before = apic_read_bit(APIC_TMR, 0x79); + set_irq_line(0x0e, 1); + asm volatile ("nop"); + report("TMR for ioapic level interrupts (expected %s)", + tmr_before == expected_tmr_before && g_tmr_79, + expected_tmr_before ? "true" : "false"); +} + +#define IPI_DELAY 1000000 + +static void delay(int count) +{ + while(count--) asm(""); +} + +static void toggle_irq_line_0x0e(void *data) +{ + irq_disable(); + delay(IPI_DELAY); + toggle_irq_line(0x0e); + irq_enable(); +} + +static void test_ioapic_edge_tmr_smp(bool expected_tmr_before) +{ + int tmr_before; + int i; + + g_tmr_79 = -1; + handle_irq(0x79, ioapic_isr_79); + set_ioapic_redir(0x0e, 0x79, TRIGGER_EDGE); + tmr_before = apic_read_bit(APIC_TMR, 0x79); + on_cpu_async(1, toggle_irq_line_0x0e, 0); + i = 0; + while(g_tmr_79 == -1) i++; + printf("%d iterations before interrupt received\n", i); + report("TMR for ioapic edge interrupts (expected %s)", + tmr_before == expected_tmr_before && !g_tmr_79, + expected_tmr_before ? "true" : "false"); +} + +static void set_irq_line_0x0e(void *data) +{ + irq_disable(); + delay(IPI_DELAY); + set_irq_line(0x0e, 1); + irq_enable(); +} + +static void test_ioapic_level_tmr_smp(bool expected_tmr_before) +{ + int i, tmr_before; + + g_tmr_79 = -1; + handle_irq(0x79, ioapic_isr_79); + set_ioapic_redir(0x0e, 0x79, TRIGGER_LEVEL); + tmr_before = apic_read_bit(APIC_TMR, 0x79); + on_cpu_async(1, set_irq_line_0x0e, 0); + i = 0; + while(g_tmr_79 == -1) i++; + printf("%d iterations before interrupt received\n", i); + report("TMR for ioapic level interrupts (expected %s)", + tmr_before == expected_tmr_before && g_tmr_79, + expected_tmr_before ? "true" : "false"); +} + +static int g_isr_98; + +static void ioapic_isr_98(isr_regs_t *regs) +{ + ++g_isr_98; + if (g_isr_98 == 1) { + set_irq_line(0x0e, 0); + set_irq_line(0x0e, 1); + } + set_irq_line(0x0e, 0); + eoi(); +} + +static void test_ioapic_level_coalesce(void) +{ + handle_irq(0x98, ioapic_isr_98); + set_ioapic_redir(0x0e, 0x98, TRIGGER_LEVEL); + set_irq_line(0x0e, 1); + asm volatile ("nop"); + report("coalesce simultaneous level interrupts", g_isr_98 == 1); +} + +static int g_isr_99; + +static void ioapic_isr_99(isr_regs_t *regs) +{ + ++g_isr_99; + set_irq_line(0x0e, 0); + eoi(); +} + +static void test_ioapic_level_sequential(void) +{ + handle_irq(0x99, ioapic_isr_99); + set_ioapic_redir(0x0e, 0x99, TRIGGER_LEVEL); + set_irq_line(0x0e, 1); + set_irq_line(0x0e, 1); + asm volatile ("nop"); + report("sequential level interrupts", g_isr_99 == 2); +} + +static volatile int g_isr_9a; + +static void ioapic_isr_9a(isr_regs_t *regs) +{ + ++g_isr_9a; + if (g_isr_9a == 2) + set_irq_line(0x0e, 0); + eoi(); +} + +static void test_ioapic_level_retrigger(void) +{ + int i; + + handle_irq(0x9a, ioapic_isr_9a); + set_ioapic_redir(0x0e, 0x9a, TRIGGER_LEVEL); + + asm volatile ("cli"); + set_irq_line(0x0e, 1); + + for (i = 0; i < 10; i++) { + if (g_isr_9a == 2) + break; + + asm volatile ("sti; hlt; cli"); + } + + asm volatile ("sti"); + + report("retriggered level interrupts without masking", g_isr_9a == 2); +} + +static volatile int g_isr_81; + +static void ioapic_isr_81(isr_regs_t *regs) +{ + ++g_isr_81; + set_irq_line(0x0e, 0); + eoi(); +} + +static void test_ioapic_edge_mask(void) +{ + handle_irq(0x81, ioapic_isr_81); + set_ioapic_redir(0x0e, 0x81, TRIGGER_EDGE); + + set_mask(0x0e, true); + set_irq_line(0x0e, 1); + set_irq_line(0x0e, 0); + + asm volatile ("nop"); + report("masked level interrupt", g_isr_81 == 0); + + set_mask(0x0e, false); + set_irq_line(0x0e, 1); + + asm volatile ("nop"); + report("unmasked level interrupt", g_isr_81 == 1); +} + +static volatile int g_isr_82; + +static void ioapic_isr_82(isr_regs_t *regs) +{ + ++g_isr_82; + set_irq_line(0x0e, 0); + eoi(); +} + +static void test_ioapic_level_mask(void) +{ + handle_irq(0x82, ioapic_isr_82); + set_ioapic_redir(0x0e, 0x82, TRIGGER_LEVEL); + + set_mask(0x0e, true); + set_irq_line(0x0e, 1); + + asm volatile ("nop"); + report("masked level interrupt", g_isr_82 == 0); + + set_mask(0x0e, false); + + asm volatile ("nop"); + report("unmasked level interrupt", g_isr_82 == 1); +} + +static volatile int g_isr_83; + +static void ioapic_isr_83(isr_regs_t *regs) +{ + ++g_isr_83; + set_mask(0x0e, true); + eoi(); +} + +static void test_ioapic_level_retrigger_mask(void) +{ + handle_irq(0x83, ioapic_isr_83); + set_ioapic_redir(0x0e, 0x83, TRIGGER_LEVEL); + + set_irq_line(0x0e, 1); + asm volatile ("nop"); + set_mask(0x0e, false); + asm volatile ("nop"); + report("retriggered level interrupts with mask", g_isr_83 == 2); + + set_irq_line(0x0e, 0); + set_mask(0x0e, false); +} + + +int main(void) +{ + setup_vm(); + smp_init(); + + mask_pic_interrupts(); + + if (enable_x2apic()) + printf("x2apic enabled\n"); + else + printf("x2apic not detected\n"); + + irq_enable(); + + ioapic_reg_version(); + ioapic_reg_id(); + ioapic_arbitration_id(); + + test_ioapic_edge_intr(); + test_ioapic_level_intr(); + test_ioapic_simultaneous(); + + test_ioapic_level_coalesce(); + test_ioapic_level_sequential(); + test_ioapic_level_retrigger(); + + test_ioapic_edge_mask(); + test_ioapic_level_mask(); + test_ioapic_level_retrigger_mask(); + + test_ioapic_edge_tmr(false); + test_ioapic_level_tmr(false); + test_ioapic_level_tmr(true); + test_ioapic_edge_tmr(true); + + if (cpu_count() > 1) { + test_ioapic_edge_tmr_smp(false); + test_ioapic_level_tmr_smp(false); + test_ioapic_level_tmr_smp(true); + test_ioapic_edge_tmr_smp(true); + } + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/ioram.h b/tests/kvm-unit-tests/x86/ioram.h new file mode 100644 index 00000000..2938142b --- /dev/null +++ b/tests/kvm-unit-tests/x86/ioram.h @@ -0,0 +1,7 @@ +#ifndef __IO_RAM_H +#define __IO_RAM_H + +#define IORAM_BASE_PHYS 0xff000000UL +#define IORAM_LEN 0x10000UL + +#endif diff --git a/tests/kvm-unit-tests/x86/kvmclock.c b/tests/kvm-unit-tests/x86/kvmclock.c new file mode 100644 index 00000000..bad07849 --- /dev/null +++ b/tests/kvm-unit-tests/x86/kvmclock.c @@ -0,0 +1,288 @@ +#include "libcflat.h" +#include "smp.h" +#include "atomic.h" +#include "processor.h" +#include "kvmclock.h" +#include "asm/barrier.h" + +#define unlikely(x) __builtin_expect(!!(x), 0) +#define likely(x) __builtin_expect(!!(x), 1) + + +struct pvclock_vcpu_time_info __attribute__((aligned(4))) hv_clock[MAX_CPU]; +struct pvclock_wall_clock wall_clock; +static unsigned char valid_flags = 0; +static atomic64_t last_value = ATOMIC64_INIT(0); + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#endif + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#elif defined(__x86_64__) + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); +#else +#error implement me! +#endif + + return product; +} + +#ifdef __i386__ +# define do_div(n,base) ({ \ + u32 __base = (base); \ + u32 __rem; \ + __rem = ((u64)(n)) % __base; \ + (n) = ((u64)(n)) / __base; \ + __rem; \ + }) +#else +u32 __attribute__((weak)) __div64_32(u64 *n, u32 base) +{ + u64 rem = *n; + u64 b = base; + u64 res, d = 1; + u32 high = rem >> 32; + + /* Reduce the thing a bit first */ + res = 0; + if (high >= base) { + high /= base; + res = (u64) high << 32; + rem -= (u64) (high*base) << 32; + } + + while ((s64)b > 0 && b < rem) { + b = b+b; + d = d+d; + } + + do { + if (rem >= b) { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + *n = res; + return rem; +} + +# define do_div(n,base) ({ \ + u32 __base = (base); \ + u32 __rem; \ + (void)(((typeof((n)) *)0) == ((u64 *)0)); \ + if (likely(((n) >> 32) == 0)) { \ + __rem = (u32)(n) % __base; \ + (n) = (u32)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ + }) +#endif + +/** + * set_normalized_timespec - set timespec sec and nsec parts and normalize + * + * @ts: pointer to timespec variable to be set + * @sec: seconds to set + * @nsec: nanoseconds to set + * + * Set seconds and nanoseconds field of a timespec variable and + * normalize to the timespec storage format + * + * Note: The tv_nsec part is always in the range of + * 0 <= tv_nsec < NSEC_PER_SEC + * For negative values only the tv_sec field is negative ! + */ +void set_normalized_timespec(struct timespec *ts, long sec, s64 nsec) +{ + while (nsec >= NSEC_PER_SEC) { + /* + * The following asm() prevents the compiler from + * optimising this loop into a modulo operation. See + * also __iter_div_u64_rem() in include/linux/time.h + */ + asm("" : "+rm"(nsec)); + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + asm("" : "+rm"(nsec)); + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static inline +unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) +{ + unsigned version = src->version & ~1; + /* Make sure that the version is read before the data. */ + smp_rmb(); + return version; +} + +static inline +bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, + unsigned version) +{ + /* Make sure that the version is re-read after the data. */ + smp_rmb(); + return version != src->version; +} + +static inline u64 rdtsc_ordered() +{ + /* + * FIXME: on Intel CPUs rmb() aka lfence is sufficient which brings up + * to 2x speedup + */ + mb(); + return rdtsc(); +} + +static inline +cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) +{ + u64 delta = rdtsc_ordered() - src->tsc_timestamp; + cycle_t offset = scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + return src->system_time + offset; +} + +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +{ + unsigned version; + cycle_t ret; + u64 last; + u8 flags; + + do { + version = pvclock_read_begin(src); + ret = __pvclock_read_cycles(src); + flags = src->flags; + } while (pvclock_read_retry(src, version)); + + if ((valid_flags & PVCLOCK_RAW_CYCLE_BIT) || + ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && + (flags & PVCLOCK_TSC_STABLE_BIT))) + return ret; + + /* + * Assumption here is that last_value, a global accumulator, always goes + * forward. If we are less than that, we should not be much smaller. + * We assume there is an error marging we're inside, and then the correction + * does not sacrifice accuracy. + * + * For reads: global may have changed between test and return, + * but this means someone else updated poked the clock at a later time. + * We just need to make sure we are not seeing a backwards event. + * + * For updates: last_value = ret is not enough, since two vcpus could be + * updating at the same time, and one of them could be slightly behind, + * making the assumption that last_value always go forward fail to hold. + */ + last = atomic64_read(&last_value); + do { + if (ret < last) + return last; + last = atomic64_cmpxchg(&last_value, last, ret); + } while (unlikely(last != ret)); + + return ret; +} + +cycle_t kvm_clock_read() +{ + struct pvclock_vcpu_time_info *src; + cycle_t ret; + int index = smp_id(); + + src = &hv_clock[index]; + ret = pvclock_clocksource_read(src); + return ret; +} + +void kvm_clock_init(void *data) +{ + int index = smp_id(); + struct pvclock_vcpu_time_info *hvc = &hv_clock[index]; + + printf("kvm-clock: cpu %d, msr %p\n", index, hvc); + wrmsr(MSR_KVM_SYSTEM_TIME_NEW, (unsigned long)hvc | 1); +} + +void kvm_clock_clear(void *data) +{ + wrmsr(MSR_KVM_SYSTEM_TIME_NEW, 0LL); +} + +void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, + struct pvclock_vcpu_time_info *vcpu_time, + struct timespec *ts) +{ + u32 version; + u64 delta; + struct timespec now; + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ + delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; + + now.tv_nsec = do_div(delta, NSEC_PER_SEC); + now.tv_sec = delta; + + set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); +} + +void kvm_get_wallclock(struct timespec *ts) +{ + struct pvclock_vcpu_time_info *vcpu_time; + int index = smp_id(); + + wrmsr(MSR_KVM_WALL_CLOCK_NEW, (unsigned long)&wall_clock); + vcpu_time = &hv_clock[index]; + pvclock_read_wallclock(&wall_clock, vcpu_time, ts); +} + +void pvclock_set_flags(unsigned char flags) +{ + valid_flags = flags; +} diff --git a/tests/kvm-unit-tests/x86/kvmclock.h b/tests/kvm-unit-tests/x86/kvmclock.h new file mode 100644 index 00000000..dff68021 --- /dev/null +++ b/tests/kvm-unit-tests/x86/kvmclock.h @@ -0,0 +1,44 @@ +#ifndef KVMCLOCK_H +#define KVMCLOCK_H + +#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 +#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 + +#define MAX_CPU 64 + +#define PVCLOCK_TSC_STABLE_BIT (1 << 0) +#define PVCLOCK_RAW_CYCLE_BIT (1 << 7) /* Get raw cycle */ + +# define NSEC_PER_SEC 1000000000ULL + +typedef u64 cycle_t; + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +struct timespec { + long tv_sec; + long tv_nsec; +}; + +void pvclock_set_flags(unsigned char flags); +cycle_t kvm_clock_read(); +void kvm_get_wallclock(struct timespec *ts); +void kvm_clock_init(void *data); +void kvm_clock_clear(void *data); + +#endif diff --git a/tests/kvm-unit-tests/x86/kvmclock_test.c b/tests/kvm-unit-tests/x86/kvmclock_test.c new file mode 100644 index 00000000..b90203e0 --- /dev/null +++ b/tests/kvm-unit-tests/x86/kvmclock_test.c @@ -0,0 +1,168 @@ +#include "libcflat.h" +#include "smp.h" +#include "atomic.h" +#include "processor.h" +#include "kvmclock.h" + +#define DEFAULT_TEST_LOOPS 100000000L +#define DEFAULT_THRESHOLD 5L + +long loops = DEFAULT_TEST_LOOPS; +long sec = 0; +long threshold = DEFAULT_THRESHOLD; + +struct test_info { + struct spinlock lock; + u64 warps; /* warp count */ + u64 stalls; /* stall count */ + long long worst; /* worst warp */ + volatile cycle_t last; /* last cycle seen by test */ + atomic_t ncpus; /* number of cpu in the test*/ + int check; /* check cycle ? */ +}; + +struct test_info ti[4]; + +static void wallclock_test(void *data) +{ + int *p_err = data; + long ksec, offset; + struct timespec ts; + + kvm_get_wallclock(&ts); + ksec = ts.tv_sec; + + offset = ksec - sec; + printf("Raw nanoseconds value from kvmclock: %" PRIu64 " (cpu %d)\n", kvm_clock_read(), smp_id()); + printf("Seconds get from kvmclock: %ld (cpu %d, offset: %ld)\n", ksec, smp_id(), offset); + + if (offset > threshold || offset < -threshold) { + printf("offset too large!\n"); + (*p_err)++; + } +} + +static void kvm_clock_test(void *data) +{ + struct test_info *hv_test_info = (struct test_info *)data; + long i, check = hv_test_info->check; + + for (i = 0; i < loops; i++){ + cycle_t t0, t1; + long long delta; + + if (check == 0) { + kvm_clock_read(); + continue; + } + + spin_lock(&hv_test_info->lock); + t1 = kvm_clock_read(); + t0 = hv_test_info->last; + hv_test_info->last = kvm_clock_read(); + spin_unlock(&hv_test_info->lock); + + delta = t1 - t0; + if (delta < 0) { + spin_lock(&hv_test_info->lock); + ++hv_test_info->warps; + if (delta < hv_test_info->worst){ + hv_test_info->worst = delta; + printf("Worst warp %lld\n", hv_test_info->worst); + } + spin_unlock(&hv_test_info->lock); + } + if (delta == 0) + ++hv_test_info->stalls; + + if (!((unsigned long)i & 31)) + asm volatile("rep; nop"); + } + + atomic_dec(&hv_test_info->ncpus); +} + +static int cycle_test(int ncpus, int check, struct test_info *ti) +{ + int i; + unsigned long long begin, end; + + begin = rdtsc(); + + atomic_set(&ti->ncpus, ncpus); + ti->check = check; + for (i = ncpus - 1; i >= 0; i--) + on_cpu_async(i, kvm_clock_test, (void *)ti); + + /* Wait for the end of other vcpu */ + while(atomic_read(&ti->ncpus)) + ; + + end = rdtsc(); + + printf("Total vcpus: %d\n", ncpus); + printf("Test loops: %ld\n", loops); + if (check == 1) { + printf("Total warps: %" PRId64 "\n", ti->warps); + printf("Total stalls: %" PRId64 "\n", ti->stalls); + printf("Worst warp: %lld\n", ti->worst); + } else + printf("TSC cycles: %lld\n", end - begin); + + return ti->warps ? 1 : 0; +} + +int main(int ac, char **av) +{ + int nerr = 0; + int ncpus; + int i; + + if (ac > 1) + loops = atol(av[1]); + if (ac > 2) + sec = atol(av[2]); + if (ac > 3) + threshold = atol(av[3]); + + smp_init(); + + ncpus = cpu_count(); + if (ncpus > MAX_CPU) + ncpus = MAX_CPU; + for (i = 0; i < ncpus; ++i) + on_cpu(i, kvm_clock_init, (void *)0); + + if (ac > 2) { + printf("Wallclock test, threshold %ld\n", threshold); + printf("Seconds get from host: %ld\n", sec); + for (i = 0; i < ncpus; ++i) + on_cpu(i, wallclock_test, &nerr); + } + + printf("Check the stability of raw cycle ...\n"); + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT + | PVCLOCK_RAW_CYCLE_BIT); + if (cycle_test(ncpus, 1, &ti[0])) + printf("Raw cycle is not stable\n"); + else + printf("Raw cycle is stable\n"); + + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + printf("Monotonic cycle test:\n"); + nerr += cycle_test(ncpus, 1, &ti[1]); + + printf("Measure the performance of raw cycle ...\n"); + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT + | PVCLOCK_RAW_CYCLE_BIT); + cycle_test(ncpus, 0, &ti[2]); + + printf("Measure the performance of adjusted cycle ...\n"); + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + cycle_test(ncpus, 0, &ti[3]); + + for (i = 0; i < ncpus; ++i) + on_cpu(i, kvm_clock_clear, (void *)0); + + return nerr > 0 ? 1 : 0; +} diff --git a/tests/kvm-unit-tests/x86/memory.c b/tests/kvm-unit-tests/x86/memory.c new file mode 100644 index 00000000..cd1eb465 --- /dev/null +++ b/tests/kvm-unit-tests/x86/memory.c @@ -0,0 +1,88 @@ +/* + * Test for x86 cache and memory instructions + * + * Copyright (c) 2015 Red Hat Inc + * + * Authors: + * Eduardo Habkost + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "libcflat.h" +#include "desc.h" +#include "processor.h" + +static long target; +static volatile int ud; +static volatile int isize; + +static void handle_ud(struct ex_regs *regs) +{ + ud = 1; + regs->rip += isize; +} + +int main(int ac, char **av) +{ + struct cpuid cpuid7, cpuid1; + int xfail; + + setup_idt(); + handle_exception(UD_VECTOR, handle_ud); + + cpuid1 = cpuid(1); + cpuid7 = cpuid_indexed(7, 0); + + /* 3-byte instructions: */ + isize = 3; + + xfail = !(cpuid1.d & (1U << 19)); /* CLFLUSH */ + ud = 0; + asm volatile("clflush (%0)" : : "b" (&target)); + report_xfail("clflush", xfail, ud == 0); + + xfail = !(cpuid1.d & (1U << 25)); /* SSE */ + ud = 0; + asm volatile("sfence"); + report_xfail("sfence", xfail, ud == 0); + + xfail = !(cpuid1.d & (1U << 26)); /* SSE2 */ + ud = 0; + asm volatile("lfence"); + report_xfail("lfence", xfail, ud == 0); + + ud = 0; + asm volatile("mfence"); + report_xfail("mfence", xfail, ud == 0); + + /* 4-byte instructions: */ + isize = 4; + + xfail = !(cpuid7.b & (1U << 23)); /* CLFLUSHOPT */ + ud = 0; + /* clflushopt (%rbx): */ + asm volatile(".byte 0x66, 0x0f, 0xae, 0x3b" : : "b" (&target)); + report_xfail("clflushopt", xfail, ud == 0); + + xfail = !(cpuid7.b & (1U << 24)); /* CLWB */ + ud = 0; + /* clwb (%rbx): */ + asm volatile(".byte 0x66, 0x0f, 0xae, 0x33" : : "b" (&target)); + report_xfail("clwb", xfail, ud == 0); + + ud = 0; + /* clwb requires a memory operand, the following is NOT a valid + * CLWB instruction (modrm == 0xF0). + */ + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf0"); + report("fake clwb", ud); + + xfail = !(cpuid7.b & (1U << 22)); /* PCOMMIT */ + ud = 0; + /* pcommit: */ + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); + report_xfail("pcommit", xfail, ud == 0); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/msr.c b/tests/kvm-unit-tests/x86/msr.c new file mode 100644 index 00000000..ded94249 --- /dev/null +++ b/tests/kvm-unit-tests/x86/msr.c @@ -0,0 +1,131 @@ +/* msr tests */ + +#include "libcflat.h" +#include "processor.h" +#include "msr.h" + +struct msr_info { + int index; + char *name; + struct tc { + int valid; + unsigned long long value; + unsigned long long expected; + } val_pairs[20]; +}; + + +#define addr_64 0x0000123456789abcULL + +struct msr_info msr_info[] = +{ + { .index = 0x00000174, .name = "IA32_SYSENTER_CS", + .val_pairs = {{ .valid = 1, .value = 0x1234, .expected = 0x1234}} + }, + { .index = 0x00000175, .name = "MSR_IA32_SYSENTER_ESP", + .val_pairs = {{ .valid = 1, .value = addr_64, .expected = addr_64}} + }, + { .index = 0x00000176, .name = "IA32_SYSENTER_EIP", + .val_pairs = {{ .valid = 1, .value = addr_64, .expected = addr_64}} + }, + { .index = 0x000001a0, .name = "MSR_IA32_MISC_ENABLE", + // reserved: 1:2, 4:6, 8:10, 13:15, 17, 19:21, 24:33, 35:63 + .val_pairs = {{ .valid = 1, .value = 0x400c51889, .expected = 0x400c51889}} + }, + { .index = 0x00000277, .name = "MSR_IA32_CR_PAT", + .val_pairs = {{ .valid = 1, .value = 0x07070707, .expected = 0x07070707}} + }, + { .index = 0xc0000100, .name = "MSR_FS_BASE", + .val_pairs = {{ .valid = 1, .value = addr_64, .expected = addr_64}} + }, + { .index = 0xc0000101, .name = "MSR_GS_BASE", + .val_pairs = {{ .valid = 1, .value = addr_64, .expected = addr_64}} + }, + { .index = 0xc0000102, .name = "MSR_KERNEL_GS_BASE", + .val_pairs = {{ .valid = 1, .value = addr_64, .expected = addr_64}} + }, +#ifdef __x86_64__ + { .index = 0xc0000080, .name = "MSR_EFER", + .val_pairs = {{ .valid = 1, .value = 0xD00, .expected = 0xD00}} + }, +#endif + { .index = 0xc0000082, .name = "MSR_LSTAR", + .val_pairs = {{ .valid = 1, .value = addr_64, .expected = addr_64}} + }, + { .index = 0xc0000083, .name = "MSR_CSTAR", + .val_pairs = {{ .valid = 1, .value = addr_64, .expected = addr_64}} + }, + { .index = 0xc0000084, .name = "MSR_SYSCALL_MASK", + .val_pairs = {{ .valid = 1, .value = 0xffffffff, .expected = 0xffffffff}} + }, + +// MSR_IA32_DEBUGCTLMSR needs svm feature LBRV +// MSR_VM_HSAVE_PA only AMD host +}; + +static int find_msr_info(int msr_index) +{ + int i; + for (i = 0; i < sizeof(msr_info)/sizeof(msr_info[0]) ; i++) { + if (msr_info[i].index == msr_index) { + return i; + } + } + return -1; +} + +static void test_msr_rw(int msr_index, unsigned long long input, unsigned long long expected) +{ + unsigned long long r = 0; + int index; + char *sptr; + if ((index = find_msr_info(msr_index)) != -1) { + sptr = msr_info[index].name; + } else { + printf("couldn't find name for msr # 0x%x, skipping\n", msr_index); + return; + } + wrmsr(msr_index, input); + r = rdmsr(msr_index); + if (expected != r) { + printf("testing %s: output = 0x%x:0x%x expected = 0x%x:0x%x\n", sptr, + (u32)(r >> 32), (u32)r, (u32)(expected >> 32), (u32)expected); + } + report(sptr, expected == r); +} + +static void test_syscall_lazy_load(void) +{ +#ifdef __x86_64__ + extern void syscall_target(); + u16 cs = read_cs(), ss = read_ss(); + ulong tmp; + + wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SCE); + wrmsr(MSR_LSTAR, (ulong)syscall_target); + wrmsr(MSR_STAR, (uint64_t)cs << 32); + asm volatile("pushf; syscall; syscall_target: popf" : "=c"(tmp) : : "r11"); + write_ss(ss); + // will crash horribly if broken + report("MSR_*STAR eager loading", true); +#endif +} + +int main(int ac, char **av) +{ + int i, j; + for (i = 0 ; i < sizeof(msr_info) / sizeof(msr_info[0]); i++) { + for (j = 0; j < sizeof(msr_info[i].val_pairs) / sizeof(msr_info[i].val_pairs[0]); j++) { + if (msr_info[i].val_pairs[j].valid) { + test_msr_rw(msr_info[i].index, msr_info[i].val_pairs[j].value, msr_info[i].val_pairs[j].expected); + } else { + break; + } + } + } + + test_syscall_lazy_load(); + + return report_summary(); +} + diff --git a/tests/kvm-unit-tests/x86/pcid.c b/tests/kvm-unit-tests/x86/pcid.c new file mode 100644 index 00000000..e3ccfdb3 --- /dev/null +++ b/tests/kvm-unit-tests/x86/pcid.c @@ -0,0 +1,167 @@ +/* Basic PCID & INVPCID functionality test */ + +#include "libcflat.h" +#include "processor.h" +#include "desc.h" + +#define X86_FEATURE_PCID (1 << 17) +#define X86_FEATURE_INVPCID (1 << 10) + +struct invpcid_desc { + unsigned long pcid : 12; + unsigned long rsv : 52; + unsigned long addr : 64; +}; + +int write_cr0_checking(unsigned long val) +{ + asm volatile(ASM_TRY("1f") + "mov %0, %%cr0\n\t" + "1:": : "r" (val)); + return exception_vector(); +} + +int write_cr4_checking(unsigned long val) +{ + asm volatile(ASM_TRY("1f") + "mov %0, %%cr4\n\t" + "1:": : "r" (val)); + return exception_vector(); +} + +int invpcid_checking(unsigned long type, void *desc) +{ + asm volatile (ASM_TRY("1f") + ".byte 0x66,0x0f,0x38,0x82,0x18 \n\t" /* invpcid (%rax), %rbx */ + "1:" : : "a" (desc), "b" (type)); + return exception_vector(); +} + +void test_cpuid_consistency(int pcid_enabled, int invpcid_enabled) +{ + int passed = !(!pcid_enabled && invpcid_enabled); + report("CPUID consistency", passed); +} + +void test_pcid_enabled(void) +{ + int passed = 0; + ulong cr0 = read_cr0(), cr3 = read_cr3(), cr4 = read_cr4(); + + /* try setting CR4.PCIDE, no exception expected */ + if (write_cr4_checking(cr4 | X86_CR4_PCIDE) != 0) + goto report; + + /* try clearing CR0.PG when CR4.PCIDE=1, #GP expected */ + if (write_cr0_checking(cr0 & ~X86_CR0_PG) != GP_VECTOR) + goto report; + + write_cr4(cr4); + + /* try setting CR4.PCIDE when CR3[11:0] != 0 , #GP expected */ + write_cr3(cr3 | 0x001); + if (write_cr4_checking(cr4 | X86_CR4_PCIDE) != GP_VECTOR) + goto report; + write_cr3(cr3); + + passed = 1; + +report: + report("Test on PCID when enabled", passed); +} + +void test_pcid_disabled(void) +{ + int passed = 0; + ulong cr4 = read_cr4(); + + /* try setting CR4.PCIDE, #GP expected */ + if (write_cr4_checking(cr4 | X86_CR4_PCIDE) != GP_VECTOR) + goto report; + + passed = 1; + +report: + report("Test on PCID when disabled", passed); +} + +void test_invpcid_enabled(void) +{ + int passed = 0; + ulong cr4 = read_cr4(); + struct invpcid_desc desc; + desc.rsv = 0; + + /* try executing invpcid when CR4.PCIDE=0, desc.pcid=0 and type=1 + * no exception expected + */ + desc.pcid = 0; + if (invpcid_checking(1, &desc) != 0) + goto report; + + /* try executing invpcid when CR4.PCIDE=0, desc.pcid=1 and type=1 + * #GP expected + */ + desc.pcid = 1; + if (invpcid_checking(1, &desc) != GP_VECTOR) + goto report; + + if (write_cr4_checking(cr4 | X86_CR4_PCIDE) != 0) + goto report; + + /* try executing invpcid when CR4.PCIDE=1 + * no exception expected + */ + desc.pcid = 10; + if (invpcid_checking(2, &desc) != 0) + goto report; + + passed = 1; + +report: + report("Test on INVPCID when enabled", passed); +} + +void test_invpcid_disabled(void) +{ + int passed = 0; + struct invpcid_desc desc; + + /* try executing invpcid, #UD expected */ + if (invpcid_checking(2, &desc) != UD_VECTOR) + goto report; + + passed = 1; + +report: + report("Test on INVPCID when disabled", passed); +} + +int main(int ac, char **av) +{ + struct cpuid _cpuid; + int pcid_enabled = 0, invpcid_enabled = 0; + + setup_idt(); + + _cpuid = cpuid(1); + if (_cpuid.c & X86_FEATURE_PCID) + pcid_enabled = 1; + _cpuid = cpuid_indexed(7, 0); + if (_cpuid.b & X86_FEATURE_INVPCID) + invpcid_enabled = 1; + + test_cpuid_consistency(pcid_enabled, invpcid_enabled); + + if (pcid_enabled) + test_pcid_enabled(); + else + test_pcid_disabled(); + + if (invpcid_enabled) + test_invpcid_enabled(); + else + test_invpcid_disabled(); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/pku.c b/tests/kvm-unit-tests/x86/pku.c new file mode 100644 index 00000000..0e7b03fb --- /dev/null +++ b/tests/kvm-unit-tests/x86/pku.c @@ -0,0 +1,138 @@ +#include "libcflat.h" +#include "x86/desc.h" +#include "x86/processor.h" +#include "x86/vm.h" +#include "x86/msr.h" + +#define X86_FEATURE_PKU 3 +#define CR0_WP_MASK (1UL << 16) +#define PTE_PKEY_BIT 59 +#define USER_BASE (1 << 24) +#define USER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + USER_BASE))) + +volatile int pf_count = 0; +volatile unsigned save; +volatile unsigned test; + +void set_cr0_wp(int wp) +{ + unsigned long cr0 = read_cr0(); + + cr0 &= ~CR0_WP_MASK; + if (wp) + cr0 |= CR0_WP_MASK; + write_cr0(cr0); +} + +void do_pf_tss(unsigned long error_code) +{ + pf_count++; + save = test; + write_pkru(0); +} + +extern void pf_tss(void); + +asm ("pf_tss: \n\t" +#ifdef __x86_64__ + // no task on x86_64, save/restore caller-save regs + "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n" + "push %r8; push %r9; push %r10; push %r11\n" +#endif + "call do_pf_tss \n\t" +#ifdef __x86_64__ + "pop %r11; pop %r10; pop %r9; pop %r8\n" + "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n" +#endif + "add $"S", %"R "sp\n\t" // discard error code + "iret"W" \n\t" + "jmp pf_tss\n\t" + ); + +static void init_test() +{ + pf_count = 0; + + invlpg(&test); + invlpg(&USER_VAR(test)); + write_pkru(0); + set_cr0_wp(0); +} + +int main(int ac, char **av) +{ + unsigned long i; + unsigned int pkey = 0x2; + unsigned int pkru_ad = 0x10; + unsigned int pkru_wd = 0x20; + + if (!(cpuid_indexed(7, 0).c & (1 << X86_FEATURE_PKU))) { + printf("PKU not enabled\n"); + return report_summary(); + } + + setup_vm(); + setup_alt_stack(); + set_intr_alt_stack(14, pf_tss); + wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA); + + for (i = 0; i < USER_BASE; i += PAGE_SIZE) { + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PT_USER_MASK; + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT); + invlpg((void *)i); + } + + for (i = USER_BASE; i < 2 * USER_BASE; i += PAGE_SIZE) { + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~USER_BASE; + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT); + invlpg((void *)i); + } + + write_cr4(read_cr4() | X86_CR4_PKE); + write_cr3(read_cr3()); + + init_test(); + set_cr0_wp(1); + write_pkru(pkru_ad); + test = 21; + report("write to supervisor page when pkru is ad and wp == 1", pf_count == 0 && test == 21); + + init_test(); + set_cr0_wp(0); + write_pkru(pkru_ad); + test = 22; + report("write to supervisor page when pkru is ad and wp == 0", pf_count == 0 && test == 22); + + init_test(); + set_cr0_wp(1); + write_pkru(pkru_wd); + test = 23; + report("write to supervisor page when pkru is wd and wp == 1", pf_count == 0 && test == 23); + + init_test(); + set_cr0_wp(0); + write_pkru(pkru_wd); + test = 24; + report("write to supervisor page when pkru is wd and wp == 0", pf_count == 0 && test == 24); + + init_test(); + write_pkru(pkru_wd); + set_cr0_wp(0); + USER_VAR(test) = 25; + report("write to user page when pkru is wd and wp == 0", pf_count == 0 && test == 25); + + init_test(); + write_pkru(pkru_wd); + set_cr0_wp(1); + USER_VAR(test) = 26; + report("write to user page when pkru is wd and wp == 1", pf_count == 1 && test == 26 && save == 25); + + init_test(); + write_pkru(pkru_ad); + (void)USER_VAR(test); + report("read from user page when pkru is ad", pf_count == 1 && save == 26); + + // TODO: implicit kernel access from ring 3 (e.g. int) + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/pmu.c b/tests/kvm-unit-tests/x86/pmu.c new file mode 100644 index 00000000..c6898004 --- /dev/null +++ b/tests/kvm-unit-tests/x86/pmu.c @@ -0,0 +1,413 @@ + +#include "x86/msr.h" +#include "x86/processor.h" +#include "x86/apic-defs.h" +#include "x86/apic.h" +#include "x86/desc.h" +#include "x86/isr.h" +#include "x86/vm.h" + +#include "libcflat.h" +#include + +#define FIXED_CNT_INDEX 32 +#define PC_VECTOR 32 + +#define EVNSEL_EVENT_SHIFT 0 +#define EVNTSEL_UMASK_SHIFT 8 +#define EVNTSEL_USR_SHIFT 16 +#define EVNTSEL_OS_SHIFT 17 +#define EVNTSEL_EDGE_SHIFT 18 +#define EVNTSEL_PC_SHIFT 19 +#define EVNTSEL_INT_SHIFT 20 +#define EVNTSEL_EN_SHIF 22 +#define EVNTSEL_INV_SHIF 23 +#define EVNTSEL_CMASK_SHIFT 24 + +#define EVNTSEL_EN (1 << EVNTSEL_EN_SHIF) +#define EVNTSEL_USR (1 << EVNTSEL_USR_SHIFT) +#define EVNTSEL_OS (1 << EVNTSEL_OS_SHIFT) +#define EVNTSEL_PC (1 << EVNTSEL_PC_SHIFT) +#define EVNTSEL_INT (1 << EVNTSEL_INT_SHIFT) +#define EVNTSEL_INV (1 << EVNTSEL_INV_SHIF) + +#define N 1000000 + +typedef struct { + uint32_t ctr; + uint32_t config; + uint64_t count; + int idx; +} pmu_counter_t; + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +} eax; + +union cpuid10_ebx { + struct { + unsigned int no_unhalted_core_cycles:1; + unsigned int no_instructions_retired:1; + unsigned int no_unhalted_reference_cycles:1; + unsigned int no_llc_reference:1; + unsigned int no_llc_misses:1; + unsigned int no_branch_instruction_retired:1; + unsigned int no_branch_misses_retired:1; + } split; + unsigned int full; +} ebx; + +union cpuid10_edx { + struct { + unsigned int num_counters_fixed:5; + unsigned int bit_width_fixed:8; + unsigned int reserved:19; + } split; + unsigned int full; +} edx; + +struct pmu_event { + char *name; + uint32_t unit_sel; + int min; + int max; +} gp_events[] = { + {"core cycles", 0x003c, 1*N, 50*N}, + {"instructions", 0x00c0, 10*N, 10.2*N}, + {"ref cycles", 0x013c, 0.1*N, 30*N}, + {"llc refference", 0x4f2e, 1, 2*N}, + {"llc misses", 0x412e, 1, 1*N}, + {"branches", 0x00c4, 1*N, 1.1*N}, + {"branch misses", 0x00c5, 0, 0.1*N}, +}, fixed_events[] = { + {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, + {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, + {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} +}; + +static int num_counters; + +char *buf; + +static inline void loop() +{ + unsigned long tmp, tmp2, tmp3; + + asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" + : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); + +} + +volatile uint64_t irq_received; + +static void cnt_overflow(isr_regs_t *regs) +{ + irq_received++; + apic_write(APIC_EOI, 0); +} + +static bool check_irq(void) +{ + int i; + irq_received = 0; + irq_enable(); + for (i = 0; i < 100000 && !irq_received; i++) + asm volatile("pause"); + irq_disable(); + return irq_received; +} + +static bool is_gp(pmu_counter_t *evt) +{ + return evt->ctr < MSR_CORE_PERF_FIXED_CTR0; +} + +static int event_to_global_idx(pmu_counter_t *cnt) +{ + return cnt->ctr - (is_gp(cnt) ? MSR_IA32_PERFCTR0 : + (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); +} + +static struct pmu_event* get_counter_event(pmu_counter_t *cnt) +{ + if (is_gp(cnt)) { + int i; + + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) + return &gp_events[i]; + } else + return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0]; + + return (void*)0; +} + +static void global_enable(pmu_counter_t *cnt) +{ + cnt->idx = event_to_global_idx(cnt); + + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) | + (1ull << cnt->idx)); +} + +static void global_disable(pmu_counter_t *cnt) +{ + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) & + ~(1ull << cnt->idx)); +} + + +static void start_event(pmu_counter_t *evt) +{ + wrmsr(evt->ctr, evt->count); + if (is_gp(evt)) + wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt), + evt->config | EVNTSEL_EN); + else { + uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); + int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; + uint32_t usrospmi = 0; + + if (evt->config & EVNTSEL_OS) + usrospmi |= (1 << 0); + if (evt->config & EVNTSEL_USR) + usrospmi |= (1 << 1); + if (evt->config & EVNTSEL_INT) + usrospmi |= (1 << 3); // PMI on overflow + ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); + } + global_enable(evt); +} + +static void stop_event(pmu_counter_t *evt) +{ + global_disable(evt); + if (is_gp(evt)) + wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt), + evt->config & ~EVNTSEL_EN); + else { + uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); + int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); + } + evt->count = rdmsr(evt->ctr); +} + +static void measure(pmu_counter_t *evt, int count) +{ + int i; + for (i = 0; i < count; i++) + start_event(&evt[i]); + loop(); + for (i = 0; i < count; i++) + stop_event(&evt[i]); +} + +static bool verify_event(uint64_t count, struct pmu_event *e) +{ + // printf("%lld >= %lld <= %lld\n", e->min, count, e->max); + return count >= e->min && count <= e->max; + +} + +static bool verify_counter(pmu_counter_t *cnt) +{ + return verify_event(cnt->count, get_counter_event(cnt)); +} + +static void check_gp_counter(struct pmu_event *evt) +{ + pmu_counter_t cnt = { + .ctr = MSR_IA32_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, + }; + int i; + + for (i = 0; i < num_counters; i++, cnt.ctr++) { + cnt.count = 0; + measure(&cnt, 1); + report("%s-%d", verify_event(cnt.count, evt), evt->name, i); + } +} + +static void check_gp_counters(void) +{ + int i; + + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) + if (!(ebx.full & (1 << i))) + check_gp_counter(&gp_events[i]); + else + printf("GP event '%s' is disabled\n", + gp_events[i].name); +} + +static void check_fixed_counters(void) +{ + pmu_counter_t cnt = { + .config = EVNTSEL_OS | EVNTSEL_USR, + }; + int i; + + for (i = 0; i < edx.split.num_counters_fixed; i++) { + cnt.count = 0; + cnt.ctr = fixed_events[i].unit_sel; + measure(&cnt, 1); + report("fixed-%d", verify_event(cnt.count, &fixed_events[i]), i); + } +} + +static void check_counters_many(void) +{ + pmu_counter_t cnt[10]; + int i, n; + + for (i = 0, n = 0; n < num_counters; i++) { + if (ebx.full & (1 << i)) + continue; + + cnt[n].count = 0; + cnt[n].ctr = MSR_IA32_PERFCTR0 + n; + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[i].unit_sel; + n++; + } + for (i = 0; i < edx.split.num_counters_fixed; i++) { + cnt[n].count = 0; + cnt[n].ctr = fixed_events[i].unit_sel; + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; + n++; + } + + measure(cnt, n); + + for (i = 0; i < n; i++) + if (!verify_counter(&cnt[i])) + break; + + report("all counters", i == n); +} + +static void check_counter_overflow(void) +{ + uint64_t count; + int i; + pmu_counter_t cnt = { + .ctr = MSR_IA32_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, + .count = 0, + }; + measure(&cnt, 1); + count = cnt.count; + + /* clear status before test */ + wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); + + report_prefix_push("overflow"); + + for (i = 0; i < num_counters + 1; i++, cnt.ctr++) { + uint64_t status; + int idx; + if (i == num_counters) + cnt.ctr = fixed_events[0].unit_sel; + if (i % 2) + cnt.config |= EVNTSEL_INT; + else + cnt.config &= ~EVNTSEL_INT; + idx = event_to_global_idx(&cnt); + cnt.count = 1 - count; + measure(&cnt, 1); + report("cntr-%d", cnt.count == 1, i); + status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); + report("status-%d", status & (1ull << idx), i); + wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status); + status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); + report("status clear-%d", !(status & (1ull << idx)), i); + report("irq-%d", check_irq() == (i % 2), i); + } + + report_prefix_pop(); +} + +static void check_gp_counter_cmask(void) +{ + pmu_counter_t cnt = { + .ctr = MSR_IA32_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, + .count = 0, + }; + cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); + measure(&cnt, 1); + report("cmask", cnt.count < gp_events[1].min); +} + +static void check_rdpmc(void) +{ + uint64_t val = 0x1f3456789ull; + int i; + + report_prefix_push("rdpmc"); + + for (i = 0; i < num_counters; i++) { + uint64_t x = (val & 0xffffffff) | + ((1ull << (eax.split.bit_width - 32)) - 1) << 32; + wrmsr(MSR_IA32_PERFCTR0 + i, val); + report("cntr-%d", rdpmc(i) == x, i); + report("fast-%d", rdpmc(i | (1<<31)) == (u32)val, i); + } + for (i = 0; i < edx.split.num_counters_fixed; i++) { + uint64_t x = (val & 0xffffffff) | + ((1ull << (edx.split.bit_width_fixed - 32)) - 1) << 32; + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, val); + report("fixed cntr-%d", rdpmc(i | (1 << 30)) == x, i); + report("fixed fast-%d", rdpmc(i | (3<<30)) == (u32)val, i); + } + + report_prefix_pop(); +} + +int main(int ac, char **av) +{ + struct cpuid id = cpuid(10); + + setup_vm(); + setup_idt(); + handle_irq(PC_VECTOR, cnt_overflow); + buf = vmalloc(N*64); + + eax.full = id.a; + ebx.full = id.b; + edx.full = id.d; + + if (!eax.split.version_id) { + printf("No pmu is detected!\n"); + return report_summary(); + } + printf("PMU version: %d\n", eax.split.version_id); + printf("GP counters: %d\n", eax.split.num_counters); + printf("GP counter width: %d\n", eax.split.bit_width); + printf("Mask length: %d\n", eax.split.mask_length); + printf("Fixed counters: %d\n", edx.split.num_counters_fixed); + printf("Fixed counter width: %d\n", edx.split.bit_width_fixed); + + num_counters = eax.split.num_counters; + if (num_counters > ARRAY_SIZE(gp_events)) + num_counters = ARRAY_SIZE(gp_events); + + apic_write(APIC_LVTPC, PC_VECTOR); + + check_gp_counters(); + check_fixed_counters(); + check_rdpmc(); + check_counters_many(); + check_counter_overflow(); + check_gp_counter_cmask(); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/port80.c b/tests/kvm-unit-tests/x86/port80.c new file mode 100644 index 00000000..522c1a4d --- /dev/null +++ b/tests/kvm-unit-tests/x86/port80.c @@ -0,0 +1,12 @@ +#include "libcflat.h" + +int main() +{ + int i; + + printf("begining port 0x80 write test\n"); + for (i = 0; i < 10000000; ++i) + asm volatile("outb %al, $0x80"); + printf("done\n"); + return 0; +} diff --git a/tests/kvm-unit-tests/x86/realmode.c b/tests/kvm-unit-tests/x86/realmode.c new file mode 100644 index 00000000..64116543 --- /dev/null +++ b/tests/kvm-unit-tests/x86/realmode.c @@ -0,0 +1,1787 @@ +#ifndef USE_SERIAL +#define USE_SERIAL +#endif + +asm(".code16gcc"); + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned u32; +typedef unsigned long long u64; + +void test_function(void); + +asm( + "test_function: \n\t" + "mov $0x1234, %eax \n\t" + "ret" + ); + +static int strlen(const char *str) +{ + int n; + + for (n = 0; *str; ++str) + ++n; + return n; +} + +static void outb(u8 data, u16 port) +{ + asm volatile("out %0, %1" : : "a"(data), "d"(port)); +} + +#ifdef USE_SERIAL +static int serial_iobase = 0x3f8; +static int serial_inited = 0; + +static u8 inb(u16 port) +{ + u8 data; + asm volatile("in %1, %0" : "=a"(data) : "d"(port)); + return data; +} + +static void serial_outb(char ch) +{ + u8 lsr; + + do { + lsr = inb(serial_iobase + 0x05); + } while (!(lsr & 0x20)); + + outb(ch, serial_iobase + 0x00); +} + +static void serial_init(void) +{ + u8 lcr; + + /* set DLAB */ + lcr = inb(serial_iobase + 0x03); + lcr |= 0x80; + outb(lcr, serial_iobase + 0x03); + + /* set baud rate to 115200 */ + outb(0x01, serial_iobase + 0x00); + outb(0x00, serial_iobase + 0x01); + + /* clear DLAB */ + lcr = inb(serial_iobase + 0x03); + lcr &= ~0x80; + outb(lcr, serial_iobase + 0x03); +} +#endif + +static void print_serial(const char *buf) +{ + unsigned long len = strlen(buf); +#ifdef USE_SERIAL + unsigned long i; + if (!serial_inited) { + serial_init(); + serial_inited = 1; + } + + for (i = 0; i < len; i++) { + serial_outb(buf[i]); + } +#else + asm volatile ("addr32/rep/outsb" : "+S"(buf), "+c"(len) : "d"(0xf1)); +#endif +} + +static void print_serial_u32(u32 value) +{ + char n[12], *p; + p = &n[11]; + *p = 0; + do { + *--p = '0' + (value % 10); + value /= 10; + } while (value > 0); + print_serial(p); +} + +static int failed; + +static void exit(int code) +{ + outb(code, 0xf4); +} + +struct regs { + u32 eax, ebx, ecx, edx; + u32 esi, edi, esp, ebp; + u32 eip, eflags; +}; + +struct table_descr { + u16 limit; + void *base; +} __attribute__((packed)); + +static u64 gdt[] = { + 0, + 0x00cf9b000000ffffull, // flat 32-bit code segment + 0x00cf93000000ffffull, // flat 32-bit data segment +}; + +static struct table_descr gdt_descr = { + sizeof(gdt) - 1, + gdt, +}; + +struct insn_desc { + u16 ptr; + u16 len; +}; + +static struct regs inregs, outregs; + +static void exec_in_big_real_mode(struct insn_desc *insn) +{ + unsigned long tmp; + static struct regs save; + int i; + extern u8 test_insn[], test_insn_end[]; + + for (i = 0; i < insn->len; ++i) + test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; + for (; i < test_insn_end - test_insn; ++i) + test_insn[i] = 0x90; // nop + + save = inregs; + asm volatile( + "lgdtl %[gdt_descr] \n\t" + "mov %%cr0, %[tmp] \n\t" + "or $1, %[tmp] \n\t" + "mov %[tmp], %%cr0 \n\t" + "mov %[bigseg], %%gs \n\t" + "and $-2, %[tmp] \n\t" + "mov %[tmp], %%cr0 \n\t" + + "pushw %[save]+36; popfw \n\t" + "xchg %%eax, %[save]+0 \n\t" + "xchg %%ebx, %[save]+4 \n\t" + "xchg %%ecx, %[save]+8 \n\t" + "xchg %%edx, %[save]+12 \n\t" + "xchg %%esi, %[save]+16 \n\t" + "xchg %%edi, %[save]+20 \n\t" + "xchg %%esp, %[save]+24 \n\t" + "xchg %%ebp, %[save]+28 \n\t" + + "test_insn: . = . + 32\n\t" + "test_insn_end: \n\t" + + "xchg %%eax, %[save]+0 \n\t" + "xchg %%ebx, %[save]+4 \n\t" + "xchg %%ecx, %[save]+8 \n\t" + "xchg %%edx, %[save]+12 \n\t" + "xchg %%esi, %[save]+16 \n\t" + "xchg %%edi, %[save]+20 \n\t" + "xchg %%esp, %[save]+24 \n\t" + "xchg %%ebp, %[save]+28 \n\t" + + /* Save EFLAGS in outregs*/ + "pushfl \n\t" + "popl %[save]+36 \n\t" + + /* Restore DF for the harness code */ + "cld\n\t" + "xor %[tmp], %[tmp] \n\t" + "mov %[tmp], %%gs \n\t" + : [tmp]"=&r"(tmp), [save]"+m"(save) + : [gdt_descr]"m"(gdt_descr), [bigseg]"r"((short)16) + : "cc", "memory" + ); + outregs = save; +} + +#define R_AX 1 +#define R_BX 2 +#define R_CX 4 +#define R_DX 8 +#define R_SI 16 +#define R_DI 32 +#define R_SP 64 +#define R_BP 128 + +int regs_equal(int ignore) +{ + const u32 *p1 = &inregs.eax, *p2 = &outregs.eax; // yuck + int i; + + for (i = 0; i < 8; ++i) + if (!(ignore & (1 << i)) && p1[i] != p2[i]) + return 0; + return 1; +} + +static void report(const char *name, u16 regs_ignore, _Bool ok) +{ + if (!regs_equal(regs_ignore)) { + ok = 0; + } + print_serial(ok ? "PASS: " : "FAIL: "); + print_serial(name); + print_serial("\n"); + if (!ok) + failed = 1; +} + +#define MK_INSN(name, str) \ + asm ( \ + ".pushsection .data.insn \n\t" \ + "insn_" #name ": \n\t" \ + ".word 1001f, 1002f - 1001f \n\t" \ + ".popsection \n\t" \ + ".pushsection .text.insn, \"ax\" \n\t" \ + "1001: \n\t" \ + "insn_code_" #name ": " str " \n\t" \ + "1002: \n\t" \ + ".popsection" \ + ); \ + extern struct insn_desc insn_##name; + +void test_xchg(void) +{ + MK_INSN(xchg_test1, "xchg %eax,%eax\n\t"); + MK_INSN(xchg_test2, "xchg %eax,%ebx\n\t"); + MK_INSN(xchg_test3, "xchg %eax,%ecx\n\t"); + MK_INSN(xchg_test4, "xchg %eax,%edx\n\t"); + MK_INSN(xchg_test5, "xchg %eax,%esi\n\t"); + MK_INSN(xchg_test6, "xchg %eax,%edi\n\t"); + MK_INSN(xchg_test7, "xchg %eax,%ebp\n\t"); + MK_INSN(xchg_test8, "xchg %eax,%esp\n\t"); + + inregs = (struct regs){ .eax = 0, .ebx = 1, .ecx = 2, .edx = 3, .esi = 4, .edi = 5, .ebp = 6, .esp = 7}; + + exec_in_big_real_mode(&insn_xchg_test1); + report("xchg 1", 0, 1); + + exec_in_big_real_mode(&insn_xchg_test2); + report("xchg 2", R_AX | R_BX, + outregs.eax == inregs.ebx && outregs.ebx == inregs.eax); + + exec_in_big_real_mode(&insn_xchg_test3); + report("xchg 3", R_AX | R_CX, + outregs.eax == inregs.ecx && outregs.ecx == inregs.eax); + + exec_in_big_real_mode(&insn_xchg_test4); + report("xchg 4", R_AX | R_DX, + outregs.eax == inregs.edx && outregs.edx == inregs.eax); + + exec_in_big_real_mode(&insn_xchg_test5); + report("xchg 5", R_AX | R_SI, + outregs.eax == inregs.esi && outregs.esi == inregs.eax); + + exec_in_big_real_mode(&insn_xchg_test6); + report("xchg 6", R_AX | R_DI, + outregs.eax == inregs.edi && outregs.edi == inregs.eax); + + exec_in_big_real_mode(&insn_xchg_test7); + report("xchg 7", R_AX | R_BP, + outregs.eax == inregs.ebp && outregs.ebp == inregs.eax); + + exec_in_big_real_mode(&insn_xchg_test8); + report("xchg 8", R_AX | R_SP, + outregs.eax == inregs.esp && outregs.esp == inregs.eax); +} + +void test_shld(void) +{ + MK_INSN(shld_test, "shld $8,%edx,%eax\n\t"); + + inregs = (struct regs){ .eax = 0xbe, .edx = 0xef000000 }; + exec_in_big_real_mode(&insn_shld_test); + report("shld", ~0, outregs.eax == 0xbeef); +} + +void test_mov_imm(void) +{ + MK_INSN(mov_r32_imm_1, "mov $1234567890, %eax"); + MK_INSN(mov_r16_imm_1, "mov $1234, %ax"); + MK_INSN(mov_r8_imm_1, "mov $0x12, %ah"); + MK_INSN(mov_r8_imm_2, "mov $0x34, %al"); + MK_INSN(mov_r8_imm_3, "mov $0x12, %ah\n\t" "mov $0x34, %al\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_mov_r16_imm_1); + report("mov 1", R_AX, outregs.eax == 1234); + + /* test mov $imm, %eax */ + exec_in_big_real_mode(&insn_mov_r32_imm_1); + report("mov 2", R_AX, outregs.eax == 1234567890); + + /* test mov $imm, %al/%ah */ + exec_in_big_real_mode(&insn_mov_r8_imm_1); + report("mov 3", R_AX, outregs.eax == 0x1200); + + exec_in_big_real_mode(&insn_mov_r8_imm_2); + report("mov 4", R_AX, outregs.eax == 0x34); + + exec_in_big_real_mode(&insn_mov_r8_imm_3); + report("mov 5", R_AX, outregs.eax == 0x1234); +} + +void test_sub_imm(void) +{ + MK_INSN(sub_r32_imm_1, "mov $1234567890, %eax\n\t" "sub $10, %eax\n\t"); + MK_INSN(sub_r16_imm_1, "mov $1234, %ax\n\t" "sub $10, %ax\n\t"); + MK_INSN(sub_r8_imm_1, "mov $0x12, %ah\n\t" "sub $0x10, %ah\n\t"); + MK_INSN(sub_r8_imm_2, "mov $0x34, %al\n\t" "sub $0x10, %al\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_sub_r16_imm_1); + report("sub 1", R_AX, outregs.eax == 1224); + + /* test mov $imm, %eax */ + exec_in_big_real_mode(&insn_sub_r32_imm_1); + report("sub 2", R_AX, outregs.eax == 1234567880); + + /* test mov $imm, %al/%ah */ + exec_in_big_real_mode(&insn_sub_r8_imm_1); + report("sub 3", R_AX, outregs.eax == 0x0200); + + exec_in_big_real_mode(&insn_sub_r8_imm_2); + report("sub 4", R_AX, outregs.eax == 0x24); +} + +void test_xor_imm(void) +{ + MK_INSN(xor_r32_imm_1, "mov $1234567890, %eax\n\t" "xor $1234567890, %eax\n\t"); + MK_INSN(xor_r16_imm_1, "mov $1234, %ax\n\t" "xor $1234, %ax\n\t"); + MK_INSN(xor_r8_imm_1, "mov $0x12, %ah\n\t" "xor $0x12, %ah\n\t"); + MK_INSN(xor_r8_imm_2, "mov $0x34, %al\n\t" "xor $0x34, %al\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_xor_r16_imm_1); + report("xor 1", R_AX, outregs.eax == 0); + + /* test mov $imm, %eax */ + exec_in_big_real_mode(&insn_xor_r32_imm_1); + report("xor 2", R_AX, outregs.eax == 0); + + /* test mov $imm, %al/%ah */ + exec_in_big_real_mode(&insn_xor_r8_imm_1); + report("xor 3", R_AX, outregs.eax == 0); + + exec_in_big_real_mode(&insn_xor_r8_imm_2); + report("xor 4", R_AX, outregs.eax == 0); +} + +void test_cmp_imm(void) +{ + MK_INSN(cmp_test1, "mov $0x34, %al\n\t" + "cmp $0x34, %al\n\t"); + MK_INSN(cmp_test2, "mov $0x34, %al\n\t" + "cmp $0x39, %al\n\t"); + MK_INSN(cmp_test3, "mov $0x34, %al\n\t" + "cmp $0x24, %al\n\t"); + + inregs = (struct regs){ 0 }; + + /* test cmp imm8 with AL */ + /* ZF: (bit 6) Zero Flag becomes 1 if an operation results + * in a 0 writeback, or 0 register + */ + exec_in_big_real_mode(&insn_cmp_test1); + report("cmp 1", ~0, (outregs.eflags & (1<<6)) == (1<<6)); + + exec_in_big_real_mode(&insn_cmp_test2); + report("cmp 2", ~0, (outregs.eflags & (1<<6)) == 0); + + exec_in_big_real_mode(&insn_cmp_test3); + report("cmp 3", ~0, (outregs.eflags & (1<<6)) == 0); +} + +void test_add_imm(void) +{ + MK_INSN(add_test1, "mov $0x43211234, %eax \n\t" + "add $0x12344321, %eax \n\t"); + MK_INSN(add_test2, "mov $0x12, %eax \n\t" + "add $0x21, %al\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_add_test1); + report("add 1", ~0, outregs.eax == 0x55555555); + + exec_in_big_real_mode(&insn_add_test2); + report("add 2", ~0, outregs.eax == 0x33); +} + +void test_eflags_insn(void) +{ + MK_INSN(clc, "clc"); + MK_INSN(stc, "stc"); + MK_INSN(cli, "cli"); + MK_INSN(sti, "sti"); + MK_INSN(cld, "cld"); + MK_INSN(std, "std"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_clc); + report("clc", ~0, (outregs.eflags & 1) == 0); + + exec_in_big_real_mode(&insn_stc); + report("stc", ~0, (outregs.eflags & 1) == 1); + + exec_in_big_real_mode(&insn_cli); + report("cli", ~0, !(outregs.eflags & (1 << 9))); + + exec_in_big_real_mode(&insn_sti); + report("sti", ~0, outregs.eflags & (1 << 9)); + + exec_in_big_real_mode(&insn_cld); + report("cld", ~0, !(outregs.eflags & (1 << 10))); + + exec_in_big_real_mode(&insn_std); + report("std", ~0, (outregs.eflags & (1 << 10))); +} + +void test_io(void) +{ + MK_INSN(io_test1, "mov $0xff, %al \n\t" + "out %al, $0xe0 \n\t" + "mov $0x00, %al \n\t" + "in $0xe0, %al \n\t"); + MK_INSN(io_test2, "mov $0xffff, %ax \n\t" + "out %ax, $0xe0 \n\t" + "mov $0x0000, %ax \n\t" + "in $0xe0, %ax \n\t"); + MK_INSN(io_test3, "mov $0xffffffff, %eax \n\t" + "out %eax, $0xe0 \n\t" + "mov $0x000000, %eax \n\t" + "in $0xe0, %eax \n\t"); + MK_INSN(io_test4, "mov $0xe0, %dx \n\t" + "mov $0xff, %al \n\t" + "out %al, %dx \n\t" + "mov $0x00, %al \n\t" + "in %dx, %al \n\t"); + MK_INSN(io_test5, "mov $0xe0, %dx \n\t" + "mov $0xffff, %ax \n\t" + "out %ax, %dx \n\t" + "mov $0x0000, %ax \n\t" + "in %dx, %ax \n\t"); + MK_INSN(io_test6, "mov $0xe0, %dx \n\t" + "mov $0xffffffff, %eax \n\t" + "out %eax, %dx \n\t" + "mov $0x00000000, %eax \n\t" + "in %dx, %eax \n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_io_test1); + report("pio 1", R_AX, outregs.eax == 0xff); + + exec_in_big_real_mode(&insn_io_test2); + report("pio 2", R_AX, outregs.eax == 0xffff); + + exec_in_big_real_mode(&insn_io_test3); + report("pio 3", R_AX, outregs.eax == 0xffffffff); + + exec_in_big_real_mode(&insn_io_test4); + report("pio 4", R_AX|R_DX, outregs.eax == 0xff); + + exec_in_big_real_mode(&insn_io_test5); + report("pio 5", R_AX|R_DX, outregs.eax == 0xffff); + + exec_in_big_real_mode(&insn_io_test6); + report("pio 6", R_AX|R_DX, outregs.eax == 0xffffffff); +} + +asm ("retf: lretw"); +extern void retf(); + +asm ("retf_imm: lretw $10"); +extern void retf_imm(); + +void test_call(void) +{ + u32 esp[16]; + u32 addr; + + inregs = (struct regs){ 0 }; + inregs.esp = (u32)esp; + + MK_INSN(call1, "mov $test_function, %eax \n\t" + "call *%eax\n\t"); + MK_INSN(call_near1, "jmp 2f\n\t" + "1: mov $0x1234, %eax\n\t" + "ret\n\t" + "2: call 1b\t"); + MK_INSN(call_near2, "call 1f\n\t" + "jmp 2f\n\t" + "1: mov $0x1234, %eax\n\t" + "ret\n\t" + "2:\t"); + MK_INSN(call_far1, "lcallw *(%ebx)\n\t"); + MK_INSN(call_far2, "lcallw $0, $retf\n\t"); + MK_INSN(ret_imm, "sub $10, %sp; jmp 2f; 1: retw $10; 2: callw 1b"); + MK_INSN(retf_imm, "sub $10, %sp; lcallw $0, $retf_imm"); + + exec_in_big_real_mode(&insn_call1); + report("call 1", R_AX, outregs.eax == 0x1234); + + exec_in_big_real_mode(&insn_call_near1); + report("call near 1", R_AX, outregs.eax == 0x1234); + + exec_in_big_real_mode(&insn_call_near2); + report("call near 2", R_AX, outregs.eax == 0x1234); + + addr = (((unsigned)retf >> 4) << 16) | ((unsigned)retf & 0x0f); + inregs.ebx = (unsigned)&addr; + exec_in_big_real_mode(&insn_call_far1); + report("call far 1", 0, 1); + + exec_in_big_real_mode(&insn_call_far2); + report("call far 2", 0, 1); + + exec_in_big_real_mode(&insn_ret_imm); + report("ret imm 1", 0, 1); + + exec_in_big_real_mode(&insn_retf_imm); + report("retf imm 1", 0, 1); +} + +void test_jcc_short(void) +{ + MK_INSN(jnz_short1, "jnz 1f\n\t" + "mov $0x1234, %eax\n\t" + "1:\n\t"); + MK_INSN(jnz_short2, "1:\n\t" + "cmp $0x1234, %eax\n\t" + "mov $0x1234, %eax\n\t" + "jnz 1b\n\t"); + MK_INSN(jmp_short1, "jmp 1f\n\t" + "mov $0x1234, %eax\n\t" + "1:\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_jnz_short1); + report("jnz short 1", ~0, 1); + + exec_in_big_real_mode(&insn_jnz_short2); + report("jnz short 2", R_AX, (outregs.eflags & (1 << 6))); + + exec_in_big_real_mode(&insn_jmp_short1); + report("jmp short 1", ~0, 1); +} + +void test_jcc_near(void) +{ + /* encode near jmp manually. gas will not do it if offsets < 127 byte */ + MK_INSN(jnz_near1, ".byte 0x0f, 0x85, 0x06, 0x00\n\t" + "mov $0x1234, %eax\n\t"); + MK_INSN(jnz_near2, "cmp $0x1234, %eax\n\t" + "mov $0x1234, %eax\n\t" + ".byte 0x0f, 0x85, 0xf0, 0xff\n\t"); + MK_INSN(jmp_near1, ".byte 0xE9, 0x06, 0x00\n\t" + "mov $0x1234, %eax\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_jnz_near1); + report("jnz near 1", 0, 1); + + exec_in_big_real_mode(&insn_jnz_near2); + report("jnz near 2", R_AX, outregs.eflags & (1 << 6)); + + exec_in_big_real_mode(&insn_jmp_near1); + report("jmp near 1", 0, 1); +} + +void test_long_jmp() +{ + u32 esp[16]; + + inregs = (struct regs){ 0 }; + inregs.esp = (u32)(esp+16); + MK_INSN(long_jmp, "call 1f\n\t" + "jmp 2f\n\t" + "1: jmp $0, $test_function\n\t" + "2:\n\t"); + exec_in_big_real_mode(&insn_long_jmp); + report("jmp far 1", R_AX, outregs.eax == 0x1234); +} + +void test_push_pop() +{ + MK_INSN(push32, "mov $0x12345678, %eax\n\t" + "push %eax\n\t" + "pop %ebx\n\t"); + MK_INSN(push16, "mov $0x1234, %ax\n\t" + "push %ax\n\t" + "pop %bx\n\t"); + + MK_INSN(push_es, "mov $0x231, %bx\n\t" //Just write a dummy value to see if it gets overwritten + "mov $0x123, %ax\n\t" + "mov %ax, %es\n\t" + "push %es\n\t" + "pop %bx \n\t" + ); + MK_INSN(pop_es, "push %ax\n\t" + "pop %es\n\t" + "mov %es, %bx\n\t" + ); + MK_INSN(push_pop_ss, "push %ss\n\t" + "pushw %ax\n\t" + "popw %ss\n\t" + "mov %ss, %bx\n\t" + "pop %ss\n\t" + ); + MK_INSN(push_pop_fs, "push %fs\n\t" + "pushl %eax\n\t" + "popl %fs\n\t" + "mov %fs, %ebx\n\t" + "pop %fs\n\t" + ); + MK_INSN(push_pop_high_esp_bits, + "xor $0x12340000, %esp \n\t" + "push %ax; \n\t" + "xor $0x12340000, %esp \n\t" + "pop %bx"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_push32); + report("push/pop 1", R_AX|R_BX, + outregs.eax == outregs.ebx && outregs.eax == 0x12345678); + + exec_in_big_real_mode(&insn_push16); + report("push/pop 2", R_AX|R_BX, + outregs.eax == outregs.ebx && outregs.eax == 0x1234); + + exec_in_big_real_mode(&insn_push_es); + report("push/pop 3", R_AX|R_BX, + outregs.ebx == outregs.eax && outregs.eax == 0x123); + + exec_in_big_real_mode(&insn_pop_es); + report("push/pop 4", R_AX|R_BX, outregs.ebx == outregs.eax); + + exec_in_big_real_mode(&insn_push_pop_ss); + report("push/pop 5", R_AX|R_BX, outregs.ebx == outregs.eax); + + exec_in_big_real_mode(&insn_push_pop_fs); + report("push/pop 6", R_AX|R_BX, outregs.ebx == outregs.eax); + + inregs.eax = 0x9977; + inregs.ebx = 0x7799; + exec_in_big_real_mode(&insn_push_pop_high_esp_bits); + report("push/pop with high bits set in %esp", R_BX, outregs.ebx == 0x9977); +} + +void test_null(void) +{ + MK_INSN(null, ""); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_null); + report("null", 0, 1); +} + +struct { + char stack[500]; + char top[]; +} tmp_stack; + +void test_pusha_popa() +{ + MK_INSN(pusha, "pusha\n\t" + "pop %edi\n\t" + "pop %esi\n\t" + "pop %ebp\n\t" + "add $4, %esp\n\t" + "pop %ebx\n\t" + "pop %edx\n\t" + "pop %ecx\n\t" + "pop %eax\n\t" + ); + + MK_INSN(popa, "push %eax\n\t" + "push %ecx\n\t" + "push %edx\n\t" + "push %ebx\n\t" + "push %esp\n\t" + "push %ebp\n\t" + "push %esi\n\t" + "push %edi\n\t" + "popa\n\t" + ); + + inregs = (struct regs){ .eax = 0, .ebx = 1, .ecx = 2, .edx = 3, .esi = 4, .edi = 5, .ebp = 6, .esp = (unsigned long)&tmp_stack.top }; + + exec_in_big_real_mode(&insn_pusha); + report("pusha/popa 1", 0, 1); + + exec_in_big_real_mode(&insn_popa); + report("pusha/popa 1", 0, 1); +} + +void test_iret() +{ + MK_INSN(iret32, "pushf\n\t" + "pushl %cs\n\t" + "call 1f\n\t" /* a near call will push eip onto the stack */ + "jmp 2f\n\t" + "1: iret\n\t" + "2:\n\t" + ); + + MK_INSN(iret16, "pushfw\n\t" + "pushw %cs\n\t" + "callw 1f\n\t" + "jmp 2f\n\t" + "1: iretw\n\t" + "2:\n\t"); + + MK_INSN(iret_flags32, "pushfl\n\t" + "popl %eax\n\t" + "andl $~0x2, %eax\n\t" + "orl $0xffc18028, %eax\n\t" + "pushl %eax\n\t" + "pushl %cs\n\t" + "call 1f\n\t" + "jmp 2f\n\t" + "1: iret\n\t" + "2:\n\t"); + + MK_INSN(iret_flags16, "pushfw\n\t" + "popw %ax\n\t" + "and $~0x2, %ax\n\t" + "or $0x8028, %ax\n\t" + "pushw %ax\n\t" + "pushw %cs\n\t" + "callw 1f\n\t" + "jmp 2f\n\t" + "1: iretw\n\t" + "2:\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_iret32); + report("iret 1", 0, 1); + + exec_in_big_real_mode(&insn_iret16); + report("iret 2", 0, 1); + + exec_in_big_real_mode(&insn_iret_flags32); + report("iret 3", R_AX, 1); + report("rflags.rf", ~0, !(outregs.eflags & (1 << 16))); + + exec_in_big_real_mode(&insn_iret_flags16); + report("iret 4", R_AX, 1); +} + +void test_int() +{ + inregs = (struct regs){ 0 }; + + *(u32 *)(0x11 * 4) = 0x1000; /* Store a pointer to address 0x1000 in IDT entry 0x11 */ + *(u8 *)(0x1000) = 0xcf; /* 0x1000 contains an IRET instruction */ + + MK_INSN(int11, "int $0x11\n\t"); + + exec_in_big_real_mode(&insn_int11); + report("int 1", 0, 1); +} + +void test_imul() +{ + MK_INSN(imul8_1, "mov $2, %al\n\t" + "mov $-4, %cx\n\t" + "imul %cl\n\t"); + + MK_INSN(imul16_1, "mov $2, %ax\n\t" + "mov $-4, %cx\n\t" + "imul %cx\n\t"); + + MK_INSN(imul32_1, "mov $2, %eax\n\t" + "mov $-4, %ecx\n\t" + "imul %ecx\n\t"); + + MK_INSN(imul8_2, "mov $0x12340002, %eax\n\t" + "mov $4, %cx\n\t" + "imul %cl\n\t"); + + MK_INSN(imul16_2, "mov $2, %ax\n\t" + "mov $4, %cx\n\t" + "imul %cx\n\t"); + + MK_INSN(imul32_2, "mov $2, %eax\n\t" + "mov $4, %ecx\n\t" + "imul %ecx\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_imul8_1); + report("imul 1", R_AX | R_CX | R_DX, (outregs.eax & 0xff) == (u8)-8); + + exec_in_big_real_mode(&insn_imul16_1); + report("imul 2", R_AX | R_CX | R_DX, outregs.eax == (u16)-8); + + exec_in_big_real_mode(&insn_imul32_1); + report("imul 3", R_AX | R_CX | R_DX, outregs.eax == (u32)-8); + + exec_in_big_real_mode(&insn_imul8_2); + report("imul 4", R_AX | R_CX | R_DX, + (outregs.eax & 0xffff) == 8 + && (outregs.eax & 0xffff0000) == 0x12340000); + + exec_in_big_real_mode(&insn_imul16_2); + report("imul 5", R_AX | R_CX | R_DX, outregs.eax == 8); + + exec_in_big_real_mode(&insn_imul32_2); + report("imul 6", R_AX | R_CX | R_DX, outregs.eax == 8); +} + +void test_mul() +{ + MK_INSN(mul8, "mov $2, %al\n\t" + "mov $4, %cx\n\t" + "imul %cl\n\t"); + + MK_INSN(mul16, "mov $2, %ax\n\t" + "mov $4, %cx\n\t" + "imul %cx\n\t"); + + MK_INSN(mul32, "mov $2, %eax\n\t" + "mov $4, %ecx\n\t" + "imul %ecx\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_mul8); + report("mul 1", R_AX | R_CX | R_DX, (outregs.eax & 0xff) == 8); + + exec_in_big_real_mode(&insn_mul16); + report("mul 2", R_AX | R_CX | R_DX, outregs.eax == 8); + + exec_in_big_real_mode(&insn_mul32); + report("mul 3", R_AX | R_CX | R_DX, outregs.eax == 8); +} + +void test_div() +{ + MK_INSN(div8, "mov $257, %ax\n\t" + "mov $2, %cl\n\t" + "div %cl\n\t"); + + MK_INSN(div16, "mov $512, %ax\n\t" + "mov $5, %cx\n\t" + "div %cx\n\t"); + + MK_INSN(div32, "mov $512, %eax\n\t" + "mov $5, %ecx\n\t" + "div %ecx\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_div8); + report("div 1", R_AX | R_CX | R_DX, outregs.eax == 384); + + exec_in_big_real_mode(&insn_div16); + report("div 2", R_AX | R_CX | R_DX, + outregs.eax == 102 && outregs.edx == 2); + + exec_in_big_real_mode(&insn_div32); + report("div 3", R_AX | R_CX | R_DX, + outregs.eax == 102 && outregs.edx == 2); +} + +void test_idiv() +{ + MK_INSN(idiv8, "mov $256, %ax\n\t" + "mov $-2, %cl\n\t" + "idiv %cl\n\t"); + + MK_INSN(idiv16, "mov $512, %ax\n\t" + "mov $-2, %cx\n\t" + "idiv %cx\n\t"); + + MK_INSN(idiv32, "mov $512, %eax\n\t" + "mov $-2, %ecx\n\t" + "idiv %ecx\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_idiv8); + report("idiv 1", R_AX | R_CX | R_DX, outregs.eax == (u8)-128); + + exec_in_big_real_mode(&insn_idiv16); + report("idiv 2", R_AX | R_CX | R_DX, outregs.eax == (u16)-256); + + exec_in_big_real_mode(&insn_idiv32); + report("idiv 3", R_AX | R_CX | R_DX, outregs.eax == (u32)-256); +} + +void test_cbw(void) +{ + MK_INSN(cbw, "mov $0xFE, %eax \n\t" + "cbw\n\t"); + MK_INSN(cwde, "mov $0xFFFE, %eax \n\t" + "cwde\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_cbw); + report("cbq 1", ~0, outregs.eax == 0xFFFE); + + exec_in_big_real_mode(&insn_cwde); + report("cwde 1", ~0, outregs.eax == 0xFFFFFFFE); +} + +void test_loopcc(void) +{ + MK_INSN(loop, "mov $10, %ecx\n\t" + "1: inc %eax\n\t" + "loop 1b\n\t"); + + MK_INSN(loope, "mov $10, %ecx\n\t" + "mov $1, %eax\n\t" + "1: dec %eax\n\t" + "loope 1b\n\t"); + + MK_INSN(loopne, "mov $10, %ecx\n\t" + "mov $5, %eax\n\t" + "1: dec %eax\n\t" + "loopne 1b\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_loop); + report("LOOPcc short 1", R_AX, outregs.eax == 10); + + exec_in_big_real_mode(&insn_loope); + report("LOOPcc short 2", R_AX | R_CX, + outregs.eax == -1 && outregs.ecx == 8); + + exec_in_big_real_mode(&insn_loopne); + report("LOOPcc short 3", R_AX | R_CX, + outregs.eax == 0 && outregs.ecx == 5); +} + +static void test_das(void) +{ + short i; + u16 nr_fail = 0; + static unsigned test_cases[1024] = { + 0x46000000, 0x8701a000, 0x9710fa00, 0x97119a00, + 0x02000101, 0x8301a101, 0x9310fb01, 0x93119b01, + 0x02000202, 0x8301a202, 0x9710fc02, 0x97119c02, + 0x06000303, 0x8701a303, 0x9310fd03, 0x93119d03, + 0x02000404, 0x8301a404, 0x9310fe04, 0x93119e04, + 0x06000505, 0x8701a505, 0x9710ff05, 0x97119f05, + 0x06000606, 0x8701a606, 0x56100006, 0x9711a006, + 0x02000707, 0x8301a707, 0x12100107, 0x9311a107, + 0x02000808, 0x8301a808, 0x12100208, 0x9311a208, + 0x06000909, 0x8701a909, 0x16100309, 0x9711a309, + 0x1200040a, 0x9301a40a, 0x1210040a, 0x9311a40a, + 0x1600050b, 0x9701a50b, 0x1610050b, 0x9711a50b, + 0x1600060c, 0x9701a60c, 0x1610060c, 0x9711a60c, + 0x1200070d, 0x9301a70d, 0x1210070d, 0x9311a70d, + 0x1200080e, 0x9301a80e, 0x1210080e, 0x9311a80e, + 0x1600090f, 0x9701a90f, 0x1610090f, 0x9711a90f, + 0x02001010, 0x8301b010, 0x16100a10, 0x9711aa10, + 0x06001111, 0x8701b111, 0x12100b11, 0x9311ab11, + 0x06001212, 0x8701b212, 0x16100c12, 0x9711ac12, + 0x02001313, 0x8301b313, 0x12100d13, 0x9311ad13, + 0x06001414, 0x8701b414, 0x12100e14, 0x9311ae14, + 0x02001515, 0x8301b515, 0x16100f15, 0x9711af15, + 0x02001616, 0x8301b616, 0x12101016, 0x9311b016, + 0x06001717, 0x8701b717, 0x16101117, 0x9711b117, + 0x06001818, 0x8701b818, 0x16101218, 0x9711b218, + 0x02001919, 0x8301b919, 0x12101319, 0x9311b319, + 0x1600141a, 0x9701b41a, 0x1610141a, 0x9711b41a, + 0x1200151b, 0x9301b51b, 0x1210151b, 0x9311b51b, + 0x1200161c, 0x9301b61c, 0x1210161c, 0x9311b61c, + 0x1600171d, 0x9701b71d, 0x1610171d, 0x9711b71d, + 0x1600181e, 0x9701b81e, 0x1610181e, 0x9711b81e, + 0x1200191f, 0x9301b91f, 0x1210191f, 0x9311b91f, + 0x02002020, 0x8701c020, 0x12101a20, 0x9311ba20, + 0x06002121, 0x8301c121, 0x16101b21, 0x9711bb21, + 0x06002222, 0x8301c222, 0x12101c22, 0x9311bc22, + 0x02002323, 0x8701c323, 0x16101d23, 0x9711bd23, + 0x06002424, 0x8301c424, 0x16101e24, 0x9711be24, + 0x02002525, 0x8701c525, 0x12101f25, 0x9311bf25, + 0x02002626, 0x8701c626, 0x12102026, 0x9711c026, + 0x06002727, 0x8301c727, 0x16102127, 0x9311c127, + 0x06002828, 0x8301c828, 0x16102228, 0x9311c228, + 0x02002929, 0x8701c929, 0x12102329, 0x9711c329, + 0x1600242a, 0x9301c42a, 0x1610242a, 0x9311c42a, + 0x1200252b, 0x9701c52b, 0x1210252b, 0x9711c52b, + 0x1200262c, 0x9701c62c, 0x1210262c, 0x9711c62c, + 0x1600272d, 0x9301c72d, 0x1610272d, 0x9311c72d, + 0x1600282e, 0x9301c82e, 0x1610282e, 0x9311c82e, + 0x1200292f, 0x9701c92f, 0x1210292f, 0x9711c92f, + 0x06003030, 0x8301d030, 0x12102a30, 0x9711ca30, + 0x02003131, 0x8701d131, 0x16102b31, 0x9311cb31, + 0x02003232, 0x8701d232, 0x12102c32, 0x9711cc32, + 0x06003333, 0x8301d333, 0x16102d33, 0x9311cd33, + 0x02003434, 0x8701d434, 0x16102e34, 0x9311ce34, + 0x06003535, 0x8301d535, 0x12102f35, 0x9711cf35, + 0x06003636, 0x8301d636, 0x16103036, 0x9311d036, + 0x02003737, 0x8701d737, 0x12103137, 0x9711d137, + 0x02003838, 0x8701d838, 0x12103238, 0x9711d238, + 0x06003939, 0x8301d939, 0x16103339, 0x9311d339, + 0x1200343a, 0x9701d43a, 0x1210343a, 0x9711d43a, + 0x1600353b, 0x9301d53b, 0x1610353b, 0x9311d53b, + 0x1600363c, 0x9301d63c, 0x1610363c, 0x9311d63c, + 0x1200373d, 0x9701d73d, 0x1210373d, 0x9711d73d, + 0x1200383e, 0x9701d83e, 0x1210383e, 0x9711d83e, + 0x1600393f, 0x9301d93f, 0x1610393f, 0x9311d93f, + 0x02004040, 0x8301e040, 0x16103a40, 0x9311da40, + 0x06004141, 0x8701e141, 0x12103b41, 0x9711db41, + 0x06004242, 0x8701e242, 0x16103c42, 0x9311dc42, + 0x02004343, 0x8301e343, 0x12103d43, 0x9711dd43, + 0x06004444, 0x8701e444, 0x12103e44, 0x9711de44, + 0x02004545, 0x8301e545, 0x16103f45, 0x9311df45, + 0x02004646, 0x8301e646, 0x12104046, 0x9311e046, + 0x06004747, 0x8701e747, 0x16104147, 0x9711e147, + 0x06004848, 0x8701e848, 0x16104248, 0x9711e248, + 0x02004949, 0x8301e949, 0x12104349, 0x9311e349, + 0x1600444a, 0x9701e44a, 0x1610444a, 0x9711e44a, + 0x1200454b, 0x9301e54b, 0x1210454b, 0x9311e54b, + 0x1200464c, 0x9301e64c, 0x1210464c, 0x9311e64c, + 0x1600474d, 0x9701e74d, 0x1610474d, 0x9711e74d, + 0x1600484e, 0x9701e84e, 0x1610484e, 0x9711e84e, + 0x1200494f, 0x9301e94f, 0x1210494f, 0x9311e94f, + 0x06005050, 0x8701f050, 0x12104a50, 0x9311ea50, + 0x02005151, 0x8301f151, 0x16104b51, 0x9711eb51, + 0x02005252, 0x8301f252, 0x12104c52, 0x9311ec52, + 0x06005353, 0x8701f353, 0x16104d53, 0x9711ed53, + 0x02005454, 0x8301f454, 0x16104e54, 0x9711ee54, + 0x06005555, 0x8701f555, 0x12104f55, 0x9311ef55, + 0x06005656, 0x8701f656, 0x16105056, 0x9711f056, + 0x02005757, 0x8301f757, 0x12105157, 0x9311f157, + 0x02005858, 0x8301f858, 0x12105258, 0x9311f258, + 0x06005959, 0x8701f959, 0x16105359, 0x9711f359, + 0x1200545a, 0x9301f45a, 0x1210545a, 0x9311f45a, + 0x1600555b, 0x9701f55b, 0x1610555b, 0x9711f55b, + 0x1600565c, 0x9701f65c, 0x1610565c, 0x9711f65c, + 0x1200575d, 0x9301f75d, 0x1210575d, 0x9311f75d, + 0x1200585e, 0x9301f85e, 0x1210585e, 0x9311f85e, + 0x1600595f, 0x9701f95f, 0x1610595f, 0x9711f95f, + 0x06006060, 0x47010060, 0x16105a60, 0x9711fa60, + 0x02006161, 0x03010161, 0x12105b61, 0x9311fb61, + 0x02006262, 0x03010262, 0x16105c62, 0x9711fc62, + 0x06006363, 0x07010363, 0x12105d63, 0x9311fd63, + 0x02006464, 0x03010464, 0x12105e64, 0x9311fe64, + 0x06006565, 0x07010565, 0x16105f65, 0x9711ff65, + 0x06006666, 0x07010666, 0x16106066, 0x57110066, + 0x02006767, 0x03010767, 0x12106167, 0x13110167, + 0x02006868, 0x03010868, 0x12106268, 0x13110268, + 0x06006969, 0x07010969, 0x16106369, 0x17110369, + 0x1200646a, 0x1301046a, 0x1210646a, 0x1311046a, + 0x1600656b, 0x1701056b, 0x1610656b, 0x1711056b, + 0x1600666c, 0x1701066c, 0x1610666c, 0x1711066c, + 0x1200676d, 0x1301076d, 0x1210676d, 0x1311076d, + 0x1200686e, 0x1301086e, 0x1210686e, 0x1311086e, + 0x1600696f, 0x1701096f, 0x1610696f, 0x1711096f, + 0x02007070, 0x03011070, 0x16106a70, 0x17110a70, + 0x06007171, 0x07011171, 0x12106b71, 0x13110b71, + 0x06007272, 0x07011272, 0x16106c72, 0x17110c72, + 0x02007373, 0x03011373, 0x12106d73, 0x13110d73, + 0x06007474, 0x07011474, 0x12106e74, 0x13110e74, + 0x02007575, 0x03011575, 0x16106f75, 0x17110f75, + 0x02007676, 0x03011676, 0x12107076, 0x13111076, + 0x06007777, 0x07011777, 0x16107177, 0x17111177, + 0x06007878, 0x07011878, 0x16107278, 0x17111278, + 0x02007979, 0x03011979, 0x12107379, 0x13111379, + 0x1600747a, 0x1701147a, 0x1610747a, 0x1711147a, + 0x1200757b, 0x1301157b, 0x1210757b, 0x1311157b, + 0x1200767c, 0x1301167c, 0x1210767c, 0x1311167c, + 0x1600777d, 0x1701177d, 0x1610777d, 0x1711177d, + 0x1600787e, 0x1701187e, 0x1610787e, 0x1711187e, + 0x1200797f, 0x1301197f, 0x1210797f, 0x1311197f, + 0x82008080, 0x03012080, 0x12107a80, 0x13111a80, + 0x86008181, 0x07012181, 0x16107b81, 0x17111b81, + 0x86008282, 0x07012282, 0x12107c82, 0x13111c82, + 0x82008383, 0x03012383, 0x16107d83, 0x17111d83, + 0x86008484, 0x07012484, 0x16107e84, 0x17111e84, + 0x82008585, 0x03012585, 0x12107f85, 0x13111f85, + 0x82008686, 0x03012686, 0x92108086, 0x13112086, + 0x86008787, 0x07012787, 0x96108187, 0x17112187, + 0x86008888, 0x07012888, 0x96108288, 0x17112288, + 0x82008989, 0x03012989, 0x92108389, 0x13112389, + 0x9600848a, 0x1701248a, 0x9610848a, 0x1711248a, + 0x9200858b, 0x1301258b, 0x9210858b, 0x1311258b, + 0x9200868c, 0x1301268c, 0x9210868c, 0x1311268c, + 0x9600878d, 0x1701278d, 0x9610878d, 0x1711278d, + 0x9600888e, 0x1701288e, 0x9610888e, 0x1711288e, + 0x9200898f, 0x1301298f, 0x9210898f, 0x1311298f, + 0x86009090, 0x07013090, 0x92108a90, 0x13112a90, + 0x82009191, 0x03013191, 0x96108b91, 0x17112b91, + 0x82009292, 0x03013292, 0x92108c92, 0x13112c92, + 0x86009393, 0x07013393, 0x96108d93, 0x17112d93, + 0x82009494, 0x03013494, 0x96108e94, 0x17112e94, + 0x86009595, 0x07013595, 0x92108f95, 0x13112f95, + 0x86009696, 0x07013696, 0x96109096, 0x17113096, + 0x82009797, 0x03013797, 0x92109197, 0x13113197, + 0x82009898, 0x03013898, 0x92109298, 0x13113298, + 0x86009999, 0x07013999, 0x96109399, 0x17113399, + 0x1300349a, 0x1301349a, 0x1310349a, 0x1311349a, + 0x1700359b, 0x1701359b, 0x1710359b, 0x1711359b, + 0x1700369c, 0x1701369c, 0x1710369c, 0x1711369c, + 0x1300379d, 0x1301379d, 0x1310379d, 0x1311379d, + 0x1300389e, 0x1301389e, 0x1310389e, 0x1311389e, + 0x1700399f, 0x1701399f, 0x1710399f, 0x1711399f, + 0x030040a0, 0x030140a0, 0x17103aa0, 0x17113aa0, + 0x070041a1, 0x070141a1, 0x13103ba1, 0x13113ba1, + 0x070042a2, 0x070142a2, 0x17103ca2, 0x17113ca2, + 0x030043a3, 0x030143a3, 0x13103da3, 0x13113da3, + 0x070044a4, 0x070144a4, 0x13103ea4, 0x13113ea4, + 0x030045a5, 0x030145a5, 0x17103fa5, 0x17113fa5, + 0x030046a6, 0x030146a6, 0x131040a6, 0x131140a6, + 0x070047a7, 0x070147a7, 0x171041a7, 0x171141a7, + 0x070048a8, 0x070148a8, 0x171042a8, 0x171142a8, + 0x030049a9, 0x030149a9, 0x131043a9, 0x131143a9, + 0x170044aa, 0x170144aa, 0x171044aa, 0x171144aa, + 0x130045ab, 0x130145ab, 0x131045ab, 0x131145ab, + 0x130046ac, 0x130146ac, 0x131046ac, 0x131146ac, + 0x170047ad, 0x170147ad, 0x171047ad, 0x171147ad, + 0x170048ae, 0x170148ae, 0x171048ae, 0x171148ae, + 0x130049af, 0x130149af, 0x131049af, 0x131149af, + 0x070050b0, 0x070150b0, 0x13104ab0, 0x13114ab0, + 0x030051b1, 0x030151b1, 0x17104bb1, 0x17114bb1, + 0x030052b2, 0x030152b2, 0x13104cb2, 0x13114cb2, + 0x070053b3, 0x070153b3, 0x17104db3, 0x17114db3, + 0x030054b4, 0x030154b4, 0x17104eb4, 0x17114eb4, + 0x070055b5, 0x070155b5, 0x13104fb5, 0x13114fb5, + 0x070056b6, 0x070156b6, 0x171050b6, 0x171150b6, + 0x030057b7, 0x030157b7, 0x131051b7, 0x131151b7, + 0x030058b8, 0x030158b8, 0x131052b8, 0x131152b8, + 0x070059b9, 0x070159b9, 0x171053b9, 0x171153b9, + 0x130054ba, 0x130154ba, 0x131054ba, 0x131154ba, + 0x170055bb, 0x170155bb, 0x171055bb, 0x171155bb, + 0x170056bc, 0x170156bc, 0x171056bc, 0x171156bc, + 0x130057bd, 0x130157bd, 0x131057bd, 0x131157bd, + 0x130058be, 0x130158be, 0x131058be, 0x131158be, + 0x170059bf, 0x170159bf, 0x171059bf, 0x171159bf, + 0x070060c0, 0x070160c0, 0x17105ac0, 0x17115ac0, + 0x030061c1, 0x030161c1, 0x13105bc1, 0x13115bc1, + 0x030062c2, 0x030162c2, 0x17105cc2, 0x17115cc2, + 0x070063c3, 0x070163c3, 0x13105dc3, 0x13115dc3, + 0x030064c4, 0x030164c4, 0x13105ec4, 0x13115ec4, + 0x070065c5, 0x070165c5, 0x17105fc5, 0x17115fc5, + 0x070066c6, 0x070166c6, 0x171060c6, 0x171160c6, + 0x030067c7, 0x030167c7, 0x131061c7, 0x131161c7, + 0x030068c8, 0x030168c8, 0x131062c8, 0x131162c8, + 0x070069c9, 0x070169c9, 0x171063c9, 0x171163c9, + 0x130064ca, 0x130164ca, 0x131064ca, 0x131164ca, + 0x170065cb, 0x170165cb, 0x171065cb, 0x171165cb, + 0x170066cc, 0x170166cc, 0x171066cc, 0x171166cc, + 0x130067cd, 0x130167cd, 0x131067cd, 0x131167cd, + 0x130068ce, 0x130168ce, 0x131068ce, 0x131168ce, + 0x170069cf, 0x170169cf, 0x171069cf, 0x171169cf, + 0x030070d0, 0x030170d0, 0x17106ad0, 0x17116ad0, + 0x070071d1, 0x070171d1, 0x13106bd1, 0x13116bd1, + 0x070072d2, 0x070172d2, 0x17106cd2, 0x17116cd2, + 0x030073d3, 0x030173d3, 0x13106dd3, 0x13116dd3, + 0x070074d4, 0x070174d4, 0x13106ed4, 0x13116ed4, + 0x030075d5, 0x030175d5, 0x17106fd5, 0x17116fd5, + 0x030076d6, 0x030176d6, 0x131070d6, 0x131170d6, + 0x070077d7, 0x070177d7, 0x171071d7, 0x171171d7, + 0x070078d8, 0x070178d8, 0x171072d8, 0x171172d8, + 0x030079d9, 0x030179d9, 0x131073d9, 0x131173d9, + 0x170074da, 0x170174da, 0x171074da, 0x171174da, + 0x130075db, 0x130175db, 0x131075db, 0x131175db, + 0x130076dc, 0x130176dc, 0x131076dc, 0x131176dc, + 0x170077dd, 0x170177dd, 0x171077dd, 0x171177dd, + 0x170078de, 0x170178de, 0x171078de, 0x171178de, + 0x130079df, 0x130179df, 0x131079df, 0x131179df, + 0x830080e0, 0x830180e0, 0x13107ae0, 0x13117ae0, + 0x870081e1, 0x870181e1, 0x17107be1, 0x17117be1, + 0x870082e2, 0x870182e2, 0x13107ce2, 0x13117ce2, + 0x830083e3, 0x830183e3, 0x17107de3, 0x17117de3, + 0x870084e4, 0x870184e4, 0x17107ee4, 0x17117ee4, + 0x830085e5, 0x830185e5, 0x13107fe5, 0x13117fe5, + 0x830086e6, 0x830186e6, 0x931080e6, 0x931180e6, + 0x870087e7, 0x870187e7, 0x971081e7, 0x971181e7, + 0x870088e8, 0x870188e8, 0x971082e8, 0x971182e8, + 0x830089e9, 0x830189e9, 0x931083e9, 0x931183e9, + 0x970084ea, 0x970184ea, 0x971084ea, 0x971184ea, + 0x930085eb, 0x930185eb, 0x931085eb, 0x931185eb, + 0x930086ec, 0x930186ec, 0x931086ec, 0x931186ec, + 0x970087ed, 0x970187ed, 0x971087ed, 0x971187ed, + 0x970088ee, 0x970188ee, 0x971088ee, 0x971188ee, + 0x930089ef, 0x930189ef, 0x931089ef, 0x931189ef, + 0x870090f0, 0x870190f0, 0x93108af0, 0x93118af0, + 0x830091f1, 0x830191f1, 0x97108bf1, 0x97118bf1, + 0x830092f2, 0x830192f2, 0x93108cf2, 0x93118cf2, + 0x870093f3, 0x870193f3, 0x97108df3, 0x97118df3, + 0x830094f4, 0x830194f4, 0x97108ef4, 0x97118ef4, + 0x870095f5, 0x870195f5, 0x93108ff5, 0x93118ff5, + 0x870096f6, 0x870196f6, 0x971090f6, 0x971190f6, + 0x830097f7, 0x830197f7, 0x931091f7, 0x931191f7, + 0x830098f8, 0x830198f8, 0x931092f8, 0x931192f8, + 0x870099f9, 0x870199f9, 0x971093f9, 0x971193f9, + 0x930094fa, 0x930194fa, 0x931094fa, 0x931194fa, + 0x970095fb, 0x970195fb, 0x971095fb, 0x971195fb, + 0x970096fc, 0x970196fc, 0x971096fc, 0x971196fc, + 0x930097fd, 0x930197fd, 0x931097fd, 0x931197fd, + 0x930098fe, 0x930198fe, 0x931098fe, 0x931198fe, + 0x970099ff, 0x970199ff, 0x971099ff, 0x971199ff, + }; + + MK_INSN(das, "das"); + + inregs = (struct regs){ 0 }; + + for (i = 0; i < 1024; ++i) { + unsigned tmp = test_cases[i]; + inregs.eax = tmp & 0xff; + inregs.eflags = (tmp >> 16) & 0xff; + exec_in_big_real_mode(&insn_das); + if (!regs_equal(R_AX) + || outregs.eax != ((tmp >> 8) & 0xff) + || (outregs.eflags & 0xff) != (tmp >> 24)) { + ++nr_fail; + break; + } + } + report("DAS", ~0, nr_fail == 0); +} + +void test_cwd_cdq() +{ + /* Sign-bit set */ + MK_INSN(cwd_1, "mov $0x8000, %ax\n\t" + "cwd\n\t"); + + /* Sign-bit not set */ + MK_INSN(cwd_2, "mov $0x1000, %ax\n\t" + "cwd\n\t"); + + /* Sign-bit set */ + MK_INSN(cdq_1, "mov $0x80000000, %eax\n\t" + "cdq\n\t"); + + /* Sign-bit not set */ + MK_INSN(cdq_2, "mov $0x10000000, %eax\n\t" + "cdq\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_cwd_1); + report("cwd 1", R_AX | R_DX, + outregs.eax == 0x8000 && outregs.edx == 0xffff); + + exec_in_big_real_mode(&insn_cwd_2); + report("cwd 2", R_AX | R_DX, + outregs.eax == 0x1000 && outregs.edx == 0); + + exec_in_big_real_mode(&insn_cdq_1); + report("cdq 1", R_AX | R_DX, + outregs.eax == 0x80000000 && outregs.edx == 0xffffffff); + + exec_in_big_real_mode(&insn_cdq_2); + report("cdq 2", R_AX | R_DX, + outregs.eax == 0x10000000 && outregs.edx == 0); +} + +static struct { + void *address; + unsigned short sel; +} __attribute__((packed)) desc = { + (void *)0x1234, + 0x10, +}; + +void test_lds_lss() +{ + inregs = (struct regs){ .ebx = (unsigned long)&desc }; + + MK_INSN(lds, "push %ds\n\t" + "lds (%ebx), %eax\n\t" + "mov %ds, %ebx\n\t" + "pop %ds\n\t"); + exec_in_big_real_mode(&insn_lds); + report("lds", R_AX | R_BX, + outregs.eax == (unsigned long)desc.address && + outregs.ebx == desc.sel); + + MK_INSN(les, "push %es\n\t" + "les (%ebx), %eax\n\t" + "mov %es, %ebx\n\t" + "pop %es\n\t"); + exec_in_big_real_mode(&insn_les); + report("les", R_AX | R_BX, + outregs.eax == (unsigned long)desc.address && + outregs.ebx == desc.sel); + + MK_INSN(lfs, "push %fs\n\t" + "lfs (%ebx), %eax\n\t" + "mov %fs, %ebx\n\t" + "pop %fs\n\t"); + exec_in_big_real_mode(&insn_lfs); + report("lfs", R_AX | R_BX, + outregs.eax == (unsigned long)desc.address && + outregs.ebx == desc.sel); + + MK_INSN(lgs, "push %gs\n\t" + "lgs (%ebx), %eax\n\t" + "mov %gs, %ebx\n\t" + "pop %gs\n\t"); + exec_in_big_real_mode(&insn_lgs); + report("lgs", R_AX | R_BX, + outregs.eax == (unsigned long)desc.address && + outregs.ebx == desc.sel); + + MK_INSN(lss, "push %ss\n\t" + "lss (%ebx), %eax\n\t" + "mov %ss, %ebx\n\t" + "pop %ss\n\t"); + exec_in_big_real_mode(&insn_lss); + report("lss", R_AX | R_BX, + outregs.eax == (unsigned long)desc.address && + outregs.ebx == desc.sel); +} + +void test_jcxz(void) +{ + MK_INSN(jcxz1, "jcxz 1f\n\t" + "mov $0x1234, %eax\n\t" + "1:\n\t"); + MK_INSN(jcxz2, "mov $0x100, %ecx\n\t" + "jcxz 1f\n\t" + "mov $0x1234, %eax\n\t" + "mov $0, %ecx\n\t" + "1:\n\t"); + MK_INSN(jcxz3, "mov $0x10000, %ecx\n\t" + "jcxz 1f\n\t" + "mov $0x1234, %eax\n\t" + "1:\n\t"); + MK_INSN(jecxz1, "jecxz 1f\n\t" + "mov $0x1234, %eax\n\t" + "1:\n\t"); + MK_INSN(jecxz2, "mov $0x10000, %ecx\n\t" + "jecxz 1f\n\t" + "mov $0x1234, %eax\n\t" + "mov $0, %ecx\n\t" + "1:\n\t"); + + inregs = (struct regs){ 0 }; + + exec_in_big_real_mode(&insn_jcxz1); + report("jcxz short 1", 0, 1); + + exec_in_big_real_mode(&insn_jcxz2); + report("jcxz short 2", R_AX, outregs.eax == 0x1234); + + exec_in_big_real_mode(&insn_jcxz3); + report("jcxz short 3", R_CX, outregs.ecx == 0x10000); + + exec_in_big_real_mode(&insn_jecxz1); + report("jecxz short 1", 0, 1); + + exec_in_big_real_mode(&insn_jecxz2); + report("jecxz short 2", R_AX, outregs.eax == 0x1234); +} + +static void test_cpuid(void) +{ + MK_INSN(cpuid, "cpuid"); + unsigned function = 0x1234; + unsigned eax, ebx, ecx, edx; + + inregs.eax = eax = function; + inregs.ecx = ecx = 0; + asm("cpuid" : "+a"(eax), "=b"(ebx), "+c"(ecx), "=d"(edx)); + exec_in_big_real_mode(&insn_cpuid); + report("cpuid", R_AX|R_BX|R_CX|R_DX, + outregs.eax == eax && outregs.ebx == ebx + && outregs.ecx == ecx && outregs.edx == edx); +} + +static void test_ss_base_for_esp_ebp(void) +{ + MK_INSN(ssrel1, "mov %ss, %ax; mov %bx, %ss; movl (%ebp), %ebx; mov %ax, %ss"); + MK_INSN(ssrel2, "mov %ss, %ax; mov %bx, %ss; movl (%ebp,%edi,8), %ebx; mov %ax, %ss"); + static unsigned array[] = { 0x12345678, 0, 0, 0, 0x87654321 }; + + inregs.ebx = 1; + inregs.ebp = (unsigned)array; + exec_in_big_real_mode(&insn_ssrel1); + report("ss relative addressing (1)", R_AX | R_BX, outregs.ebx == 0x87654321); + inregs.ebx = 1; + inregs.ebp = (unsigned)array; + inregs.edi = 0; + exec_in_big_real_mode(&insn_ssrel2); + report("ss relative addressing (2)", R_AX | R_BX, outregs.ebx == 0x87654321); +} + +extern unsigned long long r_gdt[]; + +static void test_sgdt_sidt(void) +{ + MK_INSN(sgdt, "sgdtw (%eax)"); + MK_INSN(sidt, "sidtw (%eax)"); + struct table_descr x, y; + + inregs.eax = (unsigned)&y; + asm volatile("sgdtw %0" : "=m"(x)); + exec_in_big_real_mode(&insn_sgdt); + report("sgdt", 0, x.limit == y.limit && x.base == y.base); + + inregs.eax = (unsigned)&y; + asm volatile("sidtw %0" : "=m"(x)); + exec_in_big_real_mode(&insn_sidt); + report("sidt", 0, x.limit == y.limit && x.base == y.base); +} + +static void test_sahf(void) +{ + MK_INSN(sahf, "sahf; pushfw; mov (%esp), %al; popfw"); + + inregs.eax = 0xfd00; + exec_in_big_real_mode(&insn_sahf); + report("sahf", R_AX, outregs.eax == (inregs.eax | 0xd7)); +} + +static void test_lahf(void) +{ + MK_INSN(lahf, "pushfw; mov %al, (%esp); popfw; lahf"); + + inregs.eax = 0xc7; + exec_in_big_real_mode(&insn_lahf); + report("lahf", R_AX, (outregs.eax >> 8) == inregs.eax); +} + +static void test_movzx_movsx(void) +{ + MK_INSN(movsx, "movsx %al, %ebx"); + MK_INSN(movzx, "movzx %al, %ebx"); + MK_INSN(movzsah, "movsx %ah, %ebx"); + MK_INSN(movzxah, "movzx %ah, %ebx"); + + inregs.eax = 0x1234569c; + inregs.esp = 0xffff; + exec_in_big_real_mode(&insn_movsx); + report("movsx", R_BX, outregs.ebx == (signed char)inregs.eax); + exec_in_big_real_mode(&insn_movzx); + report("movzx", R_BX, outregs.ebx == (unsigned char)inregs.eax); + exec_in_big_real_mode(&insn_movzsah); + report("movsx ah", R_BX, outregs.ebx == (signed char)(inregs.eax>>8)); + exec_in_big_real_mode(&insn_movzxah); + report("movzx ah", R_BX, outregs.ebx == (unsigned char)(inregs.eax >> 8)); +} + +static void test_bswap(void) +{ + MK_INSN(bswap, "bswap %ecx"); + + inregs.ecx = 0x12345678; + exec_in_big_real_mode(&insn_bswap); + report("bswap", R_CX, outregs.ecx == 0x78563412); +} + +static void test_aad(void) +{ + MK_INSN(aad, "aad"); + + inregs.eax = 0x12345678; + exec_in_big_real_mode(&insn_aad); + report("aad", R_AX, outregs.eax == 0x123400d4); +} + +static void test_aam(void) +{ + MK_INSN(aam, "aam"); + + inregs.eax = 0x76543210; + exec_in_big_real_mode(&insn_aam); + report("aam", R_AX, outregs.eax == 0x76540106); +} + +static void test_xlat(void) +{ + MK_INSN(xlat, "xlat"); + u8 table[256]; + int i; + + for (i = 0; i < 256; i++) { + table[i] = i + 1; + } + + inregs.eax = 0x89abcdef; + inregs.ebx = (u32)table; + exec_in_big_real_mode(&insn_xlat); + report("xlat", R_AX, outregs.eax == 0x89abcdf0); +} + +static void test_salc(void) +{ + MK_INSN(clc_salc, "clc; .byte 0xd6"); + MK_INSN(stc_salc, "stc; .byte 0xd6"); + + inregs.eax = 0x12345678; + exec_in_big_real_mode(&insn_clc_salc); + report("salc (1)", R_AX, outregs.eax == 0x12345600); + exec_in_big_real_mode(&insn_stc_salc); + report("salc (2)", R_AX, outregs.eax == 0x123456ff); +} + +static void test_fninit(void) +{ + u16 fcw = -1, fsw = -1; + MK_INSN(fninit, "fninit ; fnstsw (%eax) ; fnstcw (%ebx)"); + + inregs.eax = (u32)&fsw; + inregs.ebx = (u32)&fcw; + + exec_in_big_real_mode(&insn_fninit); + report("fninit", 0, fsw == 0 && (fcw & 0x103f) == 0x003f); +} + +static void test_nopl(void) +{ + MK_INSN(nopl1, ".byte 0x90\n\r"); // 1 byte nop + MK_INSN(nopl2, ".byte 0x66, 0x90\n\r"); // 2 bytes nop + MK_INSN(nopl3, ".byte 0x0f, 0x1f, 0x00\n\r"); // 3 bytes nop + MK_INSN(nopl4, ".byte 0x0f, 0x1f, 0x40, 0x00\n\r"); // 4 bytes nop + exec_in_big_real_mode(&insn_nopl1); + exec_in_big_real_mode(&insn_nopl2); + exec_in_big_real_mode(&insn_nopl3); + exec_in_big_real_mode(&insn_nopl4); + report("nopl", 0, 1); +} + +static u32 perf_baseline; + +#define PERF_COUNT 1000000 + +#define MK_INSN_PERF(name, insn) \ + MK_INSN(name, "rdtsc; mov %eax, %ebx; mov %edx, %esi\n" \ + "1:" insn "\n" \ + ".byte 0x67; loop 1b\n" \ + "rdtsc"); + +static u32 cycles_in_big_real_mode(struct insn_desc *insn) +{ + u64 start, end; + + inregs.ecx = PERF_COUNT; + exec_in_big_real_mode(insn); + start = ((u64)outregs.esi << 32) | outregs.ebx; + end = ((u64)outregs.edx << 32) | outregs.eax; + + return end - start; +} + +static void test_perf_loop(void) +{ + /* + * This test runs simple instructions that should roughly take the + * the same time to emulate: PERF_COUNT iterations of "loop" and 3 + * setup instructions. Other performance tests can run PERF_COUNT + * iterations of the same instruction and subtract the cycle count + * of this test. + */ + MK_INSN_PERF(perf_loop, ""); + perf_baseline = cycles_in_big_real_mode(&insn_perf_loop); + print_serial_u32(perf_baseline / (PERF_COUNT + 3)); + print_serial(" cycles/emulated jump instruction\n"); +} + +static void test_perf_mov(void) +{ + u32 cyc; + + MK_INSN_PERF(perf_move, "mov %esi, %edi"); + cyc = cycles_in_big_real_mode(&insn_perf_move); + print_serial_u32((cyc - perf_baseline) / PERF_COUNT); + print_serial(" cycles/emulated move instruction\n"); +} + +static void test_perf_arith(void) +{ + u32 cyc; + + MK_INSN_PERF(perf_arith, "add $4, %edi"); + cyc = cycles_in_big_real_mode(&insn_perf_arith); + print_serial_u32((cyc - perf_baseline) / PERF_COUNT); + print_serial(" cycles/emulated arithmetic instruction\n"); +} + +static void test_perf_memory_load(void) +{ + u32 cyc, tmp; + + MK_INSN_PERF(perf_memory_load, "cmp $0, (%edi)"); + inregs.edi = (u32)&tmp; + cyc = cycles_in_big_real_mode(&insn_perf_memory_load); + print_serial_u32((cyc - perf_baseline) / PERF_COUNT); + print_serial(" cycles/emulated memory load instruction\n"); +} + +static void test_perf_memory_store(void) +{ + u32 cyc, tmp; + + MK_INSN_PERF(perf_memory_store, "mov %ax, (%edi)"); + inregs.edi = (u32)&tmp; + cyc = cycles_in_big_real_mode(&insn_perf_memory_store); + print_serial_u32((cyc - perf_baseline) / PERF_COUNT); + print_serial(" cycles/emulated memory store instruction\n"); +} + +static void test_perf_memory_rmw(void) +{ + u32 cyc, tmp; + + MK_INSN_PERF(perf_memory_rmw, "add $1, (%edi)"); + inregs.edi = (u32)&tmp; + cyc = cycles_in_big_real_mode(&insn_perf_memory_rmw); + print_serial_u32((cyc - perf_baseline) / PERF_COUNT); + print_serial(" cycles/emulated memory RMW instruction\n"); +} + +void test_dr_mod(void) +{ + MK_INSN(drmod, "movl %ebx, %dr0\n\t" + ".byte 0x0f \n\t .byte 0x21 \n\t .byte 0x0\n\t"); + inregs.eax = 0xdead; + inregs.ebx = 0xaced; + exec_in_big_real_mode(&insn_drmod); + report("mov dr with mod bits", R_AX | R_BX, outregs.eax == 0xaced); +} + +void test_smsw(void) +{ + MK_INSN(smsw, "movl %cr0, %ebx\n\t" + "movl %ebx, %ecx\n\t" + "or $0x40000000, %ebx\n\t" + "movl %ebx, %cr0\n\t" + "smswl %eax\n\t" + "movl %ecx, %cr0\n\t"); + inregs.eax = 0x12345678; + exec_in_big_real_mode(&insn_smsw); + report("smsw", R_AX | R_BX | R_CX, outregs.eax == outregs.ebx); +} + +void test_xadd(void) +{ + MK_INSN(xadd, "xaddl %eax, %eax\n\t"); + inregs.eax = 0x12345678; + exec_in_big_real_mode(&insn_xadd); + report("xadd", R_AX, outregs.eax == inregs.eax * 2); +} + + +void realmode_start(void) +{ + test_null(); + + test_shld(); + test_push_pop(); + test_pusha_popa(); + test_mov_imm(); + test_cmp_imm(); + test_add_imm(); + test_sub_imm(); + test_xor_imm(); + test_io(); + test_eflags_insn(); + test_jcc_short(); + test_jcc_near(); + /* test_call() uses short jump so call it after testing jcc */ + test_call(); + /* long jmp test uses call near so test it after testing call */ + test_long_jmp(); + test_xchg(); + test_iret(); + test_int(); + test_imul(); + test_mul(); + test_div(); + test_idiv(); + test_loopcc(); + test_cbw(); + test_cwd_cdq(); + test_das(); + test_lds_lss(); + test_jcxz(); + test_cpuid(); + test_ss_base_for_esp_ebp(); + test_sgdt_sidt(); + test_lahf(); + test_sahf(); + test_movzx_movsx(); + test_bswap(); + test_aad(); + test_aam(); + test_xlat(); + test_salc(); + test_fninit(); + test_dr_mod(); + test_smsw(); + test_nopl(); + test_xadd(); + test_perf_loop(); + test_perf_mov(); + test_perf_arith(); + test_perf_memory_load(); + test_perf_memory_store(); + test_perf_memory_rmw(); + + exit(failed); +} + +unsigned long long r_gdt[] = { 0, 0x9b000000ffff, 0x93000000ffff }; + +struct table_descr r_gdt_descr = { sizeof(r_gdt) - 1, &r_gdt }; + +asm( + ".section .init \n\t" + + ".code32 \n\t" + + "mb_magic = 0x1BADB002 \n\t" + "mb_flags = 0x0 \n\t" + + "# multiboot header \n\t" + ".long mb_magic, mb_flags, 0 - (mb_magic + mb_flags) \n\t" + + ".globl start \n\t" + ".data \n\t" + ". = . + 4096 \n\t" + "stacktop: \n\t" + + ".text \n\t" + "start: \n\t" + "lgdt r_gdt_descr \n\t" + "ljmp $8, $1f; 1: \n\t" + ".code16gcc \n\t" + "mov $16, %eax \n\t" + "mov %ax, %ds \n\t" + "mov %ax, %es \n\t" + "mov %ax, %fs \n\t" + "mov %ax, %gs \n\t" + "mov %ax, %ss \n\t" + "mov %cr0, %eax \n\t" + "btc $0, %eax \n\t" + "mov %eax, %cr0 \n\t" + "ljmp $0, $realmode_entry \n\t" + + "realmode_entry: \n\t" + + "xor %ax, %ax \n\t" + "mov %ax, %ds \n\t" + "mov %ax, %es \n\t" + "mov %ax, %ss \n\t" + "mov %ax, %fs \n\t" + "mov %ax, %gs \n\t" + "mov $stacktop, %esp\n\t" + "ljmp $0, $realmode_start \n\t" + + ".code16gcc \n\t" + ); diff --git a/tests/kvm-unit-tests/x86/realmode.lds b/tests/kvm-unit-tests/x86/realmode.lds new file mode 100644 index 00000000..0ed3063b --- /dev/null +++ b/tests/kvm-unit-tests/x86/realmode.lds @@ -0,0 +1,12 @@ +SECTIONS +{ + . = 16K; + stext = .; + .text : { *(.init) *(.text) } + . = ALIGN(4K); + .data : { *(.data) *(.rodata*) } + . = ALIGN(16); + .bss : { *(.bss) } + edata = .; +} +ENTRY(start) diff --git a/tests/kvm-unit-tests/x86/rmap_chain.c b/tests/kvm-unit-tests/x86/rmap_chain.c new file mode 100644 index 00000000..7bf6275c --- /dev/null +++ b/tests/kvm-unit-tests/x86/rmap_chain.c @@ -0,0 +1,45 @@ +/* test long rmap chains */ + +#include "libcflat.h" +#include "fwcfg.h" +#include "vm.h" +#include "smp.h" + +int main (void) +{ + int i; + int nr_pages; + void *target_page, *virt_addr; + + setup_vm(); + + nr_pages = fwcfg_get_u64(FW_CFG_RAM_SIZE) / PAGE_SIZE; + nr_pages -= 1000; + target_page = alloc_page(); + + virt_addr = (void *) 0xfffffa000; + for (i = 0; i < nr_pages; i++) { + install_page(phys_to_virt(read_cr3()), virt_to_phys(target_page), + virt_addr); + virt_addr += PAGE_SIZE; + } + printf("created %d mappings\n", nr_pages); + + virt_addr = (void *) 0xfffffa000; + for (i = 0; i < nr_pages; i++) { + unsigned long *touch = virt_addr; + + *touch = 0; + virt_addr += PAGE_SIZE; + } + printf("instantiated mappings\n"); + + virt_addr += PAGE_SIZE; + install_pte(phys_to_virt(read_cr3()), 1, virt_addr, + 0 | PT_PRESENT_MASK | PT_WRITABLE_MASK, target_page); + + *(unsigned long *)virt_addr = 0; + printf("PASS\n"); + + return 0; +} diff --git a/tests/kvm-unit-tests/x86/run b/tests/kvm-unit-tests/x86/run new file mode 100755 index 00000000..867a1ccc --- /dev/null +++ b/tests/kvm-unit-tests/x86/run @@ -0,0 +1,51 @@ +#!/bin/bash + +[ -z "$STANDALONE" ] && source scripts/arch-run.bash + +qemubinarysearch="${QEMU:-qemu-kvm qemu-system-x86_64}" + +for qemucmd in ${qemubinarysearch} +do + unset QEMUFOUND + unset qemu + if ! [ -z "${QEMUFOUND=$(${qemucmd} --help 2>/dev/null | grep "QEMU")}" ] && + ${qemucmd} -device '?' 2>&1 | grep -F -e \"testdev\" -e \"pc-testdev\" > /dev/null; + then + qemu="${qemucmd}" + break + fi +done + +if [ -z "${QEMUFOUND}" ] +then + echo "A QEMU binary was not found, You can set a custom location by using the QEMU= environment variable " + exit 2 +elif [ -z "${qemu}" ] +then + echo "No Qemu test device support found" + exit 2 +fi + +if + ${qemu} -device '?' 2>&1 | grep -F "pci-testdev" > /dev/null; +then + pci_testdev="-device pci-testdev" +else + pci_testdev="" +fi + +if + ${qemu} -device '?' 2>&1 | grep -F "pc-testdev" > /dev/null; +then + pc_testdev="-device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4" +else + pc_testdev="-device testdev,chardev=testlog -chardev file,id=testlog,path=msr.out" +fi + +command="${qemu} -nodefaults -enable-kvm $pc_testdev -vnc none -serial stdio $pci_testdev $hyperv_testdev" +[ -f "$ENV" ] && command+=" -initrd $ENV" +command+=" -kernel" +command="$(timeout_cmd) $command" +echo ${command} "$@" + +run_qemu ${command} "$@" diff --git a/tests/kvm-unit-tests/x86/s3.c b/tests/kvm-unit-tests/x86/s3.c new file mode 100644 index 00000000..cef956e0 --- /dev/null +++ b/tests/kvm-unit-tests/x86/s3.c @@ -0,0 +1,89 @@ +#include "libcflat.h" +#include "x86/acpi.h" +#include "asm/io.h" + +u32* find_resume_vector_addr(void) +{ + struct facs_descriptor_rev1 *facs = find_acpi_table_addr(FACS_SIGNATURE); + if (!facs) + return 0; + printf("FACS is at %p\n", facs); + return &facs->firmware_waking_vector; +} + +#define RTC_SECONDS_ALARM 1 +#define RTC_MINUTES_ALARM 3 +#define RTC_HOURS_ALARM 5 +#define RTC_ALARM_DONT_CARE 0xC0 + +#define RTC_REG_A 10 +#define RTC_REG_B 11 +#define RTC_REG_C 12 + +#define REG_A_UIP 0x80 +#define REG_B_AIE 0x20 + +static inline int rtc_in(u8 reg) +{ + outb(reg, 0x70); + return inb(0x71); +} + +static inline void rtc_out(u8 reg, u8 val) +{ + outb(reg, 0x70); + outb(val, 0x71); +} + +extern char resume_start, resume_end; + +int main(int argc, char **argv) +{ + struct fadt_descriptor_rev1 *fadt = find_acpi_table_addr(FACP_SIGNATURE); + volatile u32 *resume_vector_ptr = find_resume_vector_addr(); + char *addr, *resume_vec = (void*)0x1000; + + *resume_vector_ptr = (u32)(ulong)resume_vec; + + printf("resume vector addr is %p\n", resume_vector_ptr); + for (addr = &resume_start; addr < &resume_end; addr++) + *resume_vec++ = *addr; + printf("copy resume code from %p\n", &resume_start); + + printf("PM1a event registers at %x\n", fadt->pm1a_evt_blk); + outw(0x400, fadt->pm1a_evt_blk + 2); + + /* Setup RTC alarm to wake up on the next second. */ + while ((rtc_in(RTC_REG_A) & REG_A_UIP) == 0); + while ((rtc_in(RTC_REG_A) & REG_A_UIP) != 0); + rtc_in(RTC_REG_C); + rtc_out(RTC_SECONDS_ALARM, RTC_ALARM_DONT_CARE); + rtc_out(RTC_MINUTES_ALARM, RTC_ALARM_DONT_CARE); + rtc_out(RTC_HOURS_ALARM, RTC_ALARM_DONT_CARE); + rtc_out(RTC_REG_B, rtc_in(RTC_REG_B) | REG_B_AIE); + + *(volatile int*)0 = 0; + asm volatile("outw %0, %1" :: "a"((short)0x2400), "d"((short)fadt->pm1a_cnt_blk):"memory"); + while(1) + *(volatile int*)0 = 1; + + return 0; +} + +asm ( + ".global resume_start\n" + ".global resume_end\n" + ".code16\n" + "resume_start:\n" + "mov 0x0, %eax\n" + "mov $0xf4, %dx\n" + "out %eax, %dx\n" + "1: hlt\n" + "jmp 1b\n" + "resume_end:\n" +#ifdef __i386__ + ".code32\n" +#else + ".code64\n" +#endif + ); diff --git a/tests/kvm-unit-tests/x86/setjmp.c b/tests/kvm-unit-tests/x86/setjmp.c new file mode 100644 index 00000000..fa331792 --- /dev/null +++ b/tests/kvm-unit-tests/x86/setjmp.c @@ -0,0 +1,19 @@ +#include "libcflat.h" +#include "setjmp.h" + +int main() +{ + volatile int i; + jmp_buf j; + + if (setjmp(j) == 0) { + i = 0; + } + printf("%d\n", i); + if (++i < 10) { + longjmp(j, 1); + } + + printf("done\n"); + return 0; +} diff --git a/tests/kvm-unit-tests/x86/sieve.c b/tests/kvm-unit-tests/x86/sieve.c new file mode 100644 index 00000000..5f13c6cd --- /dev/null +++ b/tests/kvm-unit-tests/x86/sieve.c @@ -0,0 +1,51 @@ +#include "vm.h" +#include "libcflat.h" + +int sieve(char* data, int size) +{ + int i, j, r = 0; + + for (i = 0; i < size; ++i) + data[i] = 1; + + data[0] = data[1] = 0; + + for (i = 2; i < size; ++i) + if (data[i]) { + ++r; + for (j = i*2; j < size; j += i) + data[j] = 0; + } + return r; +} + +void test_sieve(const char *msg, char *data, int size) +{ + int r; + + printf("%s:", msg); + r = sieve(data, size); + printf("%d out of %d\n", r, size); +} + +#define STATIC_SIZE 1000000 +#define VSIZE 2000000 +char static_data[STATIC_SIZE]; + +int main() +{ + void *v; + int i; + + printf("starting sieve\n"); + test_sieve("static", static_data, STATIC_SIZE); + setup_vm(); + test_sieve("mapped", static_data, STATIC_SIZE); + for (i = 0; i < 3; ++i) { + v = vmalloc(VSIZE); + test_sieve("virtual", v, VSIZE); + vfree(v); + } + + return 0; +} diff --git a/tests/kvm-unit-tests/x86/smap.c b/tests/kvm-unit-tests/x86/smap.c new file mode 100644 index 00000000..f316c146 --- /dev/null +++ b/tests/kvm-unit-tests/x86/smap.c @@ -0,0 +1,187 @@ +#include "libcflat.h" +#include "x86/desc.h" +#include "x86/processor.h" +#include "x86/vm.h" + +#define X86_FEATURE_SMAP 20 + +volatile int pf_count = 0; +volatile int save; +volatile unsigned test; + + +// When doing ring 3 tests, page fault handlers will always run on a +// separate stack (the ring 0 stack). Seems easier to use the alt_stack +// mechanism for both ring 0 and ring 3. + +void do_pf_tss(unsigned long error_code) +{ + pf_count++; + save = test; + +#ifndef __x86_64__ + tss.eflags |= X86_EFLAGS_AC; +#endif +} + +extern void pf_tss(void); +asm ("pf_tss:\n" +#ifdef __x86_64__ + // no task on x86_64, save/restore caller-save regs + "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n" + "push %r8; push %r9; push %r10; push %r11\n" + "mov 9*8(%rsp),%rsi\n" +#endif + "call do_pf_tss\n" +#ifdef __x86_64__ + "pop %r11; pop %r10; pop %r9; pop %r8\n" + "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n" +#endif + "add $"S", %"R "sp\n" +#ifdef __x86_64__ + "orl $" xstr(X86_EFLAGS_AC) ", 2*"S"(%"R "sp)\n" // set EFLAGS.AC and retry +#endif + "iret"W" \n\t" + "jmp pf_tss\n\t"); + + +#define USER_BASE (1 << 24) +#define USER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + USER_BASE))) +#define USER_ADDR(v) ((void *)((unsigned long)(&v) + USER_BASE)) + +static void init_test(int i) +{ + pf_count = 0; + if (i) { + invlpg(&test); + invlpg(&USER_VAR(test)); + } +} + +static void check_smap_nowp(void) +{ + test = 0x99; + + *get_pte(phys_to_virt(read_cr3()), USER_ADDR(test)) &= ~PT_WRITABLE_MASK; + + write_cr4(read_cr4() & ~X86_CR4_SMAP); + write_cr0(read_cr0() & ~X86_CR0_WP); + clac(); + write_cr3(read_cr3()); + + init_test(0); + USER_VAR(test) = 0x99; + report("write from user page with SMAP=0, AC=0, WP=0, PTE.U=1 && PTE.W=0", pf_count == 0); + + write_cr4(read_cr4() | X86_CR4_SMAP); + write_cr3(read_cr3()); + + init_test(0); + (void)USER_VAR(test); + report("read from user page with SMAP=1, AC=0, WP=0, PTE.U=1 && PTE.W=0", pf_count == 1 && save == 0x99); + + /* Undo changes */ + *get_pte(phys_to_virt(read_cr3()), USER_ADDR(test)) |= PT_WRITABLE_MASK; + + write_cr0(read_cr0() | X86_CR0_WP); + write_cr3(read_cr3()); +} + +int main(int ac, char **av) +{ + unsigned long i; + + if (!(cpuid_indexed(7, 0).b & (1 << X86_FEATURE_SMAP))) { + printf("SMAP not enabled\n"); + return report_summary(); + } + + setup_vm(); + setup_alt_stack(); + set_intr_alt_stack(14, pf_tss); + + // Map first 16MB as supervisor pages + for (i = 0; i < USER_BASE; i += PAGE_SIZE) { + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PT_USER_MASK; + invlpg((void *)i); + } + + // Present the same 16MB as user pages in the 16MB-32MB range + for (i = USER_BASE; i < 2 * USER_BASE; i += PAGE_SIZE) { + *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~USER_BASE; + invlpg((void *)i); + } + + clac(); + write_cr4(read_cr4() | X86_CR4_SMAP); + write_cr3(read_cr3()); + + for (i = 0; i < 2; i++) { + if (i) + printf("testing with INVLPG\n"); + else + printf("testing without INVLPG\n"); + + init_test(i); + clac(); + test = 42; + report("write to supervisor page", pf_count == 0 && test == 42); + + init_test(i); + stac(); + (void)USER_VAR(test); + report("read from user page with AC=1", pf_count == 0); + + init_test(i); + clac(); + (void)USER_VAR(test); + report("read from user page with AC=0", pf_count == 1 && save == 42); + + init_test(i); + stac(); + save = 0; + USER_VAR(test) = 43; + report("write to user page with AC=1", pf_count == 0 && test == 43); + + init_test(i); + clac(); + USER_VAR(test) = 44; + report("read from user page with AC=0", pf_count == 1 && test == 44 && save == 43); + + init_test(i); + stac(); + test = -1; + asm("or $(" xstr(USER_BASE) "), %"R "sp \n" + "push $44 \n " + "decl test\n" + "and $~(" xstr(USER_BASE) "), %"R "sp \n" + "pop %"R "ax\n" + "movl %eax, test"); + report("write to user stack with AC=1", pf_count == 0 && test == 44); + + init_test(i); + clac(); + test = -1; + asm("or $(" xstr(USER_BASE) "), %"R "sp \n" + "push $45 \n " + "decl test\n" + "and $~(" xstr(USER_BASE) "), %"R "sp \n" + "pop %"R "ax\n" + "movl %eax, test"); + report("write to user stack with AC=0", pf_count == 1 && test == 45 && save == -1); + + /* This would be trapped by SMEP */ + init_test(i); + clac(); + asm("jmp 1f + "xstr(USER_BASE)" \n" + "1: jmp 2f - "xstr(USER_BASE)" \n" + "2:"); + report("executing on user page with AC=0", pf_count == 0); + } + + check_smap_nowp(); + + // TODO: implicit kernel access from ring 3 (e.g. int) + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/smptest.c b/tests/kvm-unit-tests/x86/smptest.c new file mode 100644 index 00000000..68f35eee --- /dev/null +++ b/tests/kvm-unit-tests/x86/smptest.c @@ -0,0 +1,31 @@ +#include "libcflat.h" +#include "smp.h" + +unsigned nipis; + +static void ipi_test(void *data) +{ + int n = (long)data; + + printf("ipi called, cpu %d\n", n); + if (n != smp_id()) + printf("but wrong cpu %d\n", smp_id()); + else + nipis++; +} + +int main() +{ + int ncpus; + int i; + + smp_init(); + + ncpus = cpu_count(); + printf("found %d cpus\n", ncpus); + for (i = 0; i < ncpus; ++i) + on_cpu(i, ipi_test, (void *)(long)i); + + report("IPI to each CPU", nipis == ncpus); + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/svm.c b/tests/kvm-unit-tests/x86/svm.c new file mode 100644 index 00000000..25ac0ce4 --- /dev/null +++ b/tests/kvm-unit-tests/x86/svm.c @@ -0,0 +1,1081 @@ +#include "svm.h" +#include "libcflat.h" +#include "processor.h" +#include "desc.h" +#include "msr.h" +#include "vm.h" +#include "smp.h" +#include "types.h" + +/* for the nested page table*/ +u64 *pml4e; +u64 *pdpe; +u64 *pde[4]; +u64 *pte[2048]; +void *scratch_page; + +#define LATENCY_RUNS 1000000 + +u64 tsc_start; +u64 tsc_end; + +u64 vmrun_sum, vmexit_sum; +u64 vmsave_sum, vmload_sum; +u64 stgi_sum, clgi_sum; +u64 latvmrun_max; +u64 latvmrun_min; +u64 latvmexit_max; +u64 latvmexit_min; +u64 latvmload_max; +u64 latvmload_min; +u64 latvmsave_max; +u64 latvmsave_min; +u64 latstgi_max; +u64 latstgi_min; +u64 latclgi_max; +u64 latclgi_min; +u64 runs; + +u8 *io_bitmap; +u8 io_bitmap_area[16384]; + +static bool npt_supported(void) +{ + return cpuid(0x8000000A).d & 1; +} + +static void setup_svm(void) +{ + void *hsave = alloc_page(); + u64 *page, address; + int i,j; + + wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave)); + wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SVME); + wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX); + + scratch_page = alloc_page(); + + io_bitmap = (void *) (((ulong)io_bitmap_area + 4095) & ~4095); + + if (!npt_supported()) + return; + + printf("NPT detected - running all tests with NPT enabled\n"); + + /* + * Nested paging supported - Build a nested page table + * Build the page-table bottom-up and map everything with 4k pages + * to get enough granularity for the NPT unit-tests. + */ + + address = 0; + + /* PTE level */ + for (i = 0; i < 2048; ++i) { + page = alloc_page(); + + for (j = 0; j < 512; ++j, address += 4096) + page[j] = address | 0x067ULL; + + pte[i] = page; + } + + /* PDE level */ + for (i = 0; i < 4; ++i) { + page = alloc_page(); + + for (j = 0; j < 512; ++j) + page[j] = (u64)pte[(i * 512) + j] | 0x027ULL; + + pde[i] = page; + } + + /* PDPe level */ + pdpe = alloc_page(); + for (i = 0; i < 4; ++i) + pdpe[i] = ((u64)(pde[i])) | 0x27; + + /* PML4e level */ + pml4e = alloc_page(); + pml4e[0] = ((u64)pdpe) | 0x27; +} + +static u64 *npt_get_pde(u64 address) +{ + int i1, i2; + + address >>= 21; + i1 = (address >> 9) & 0x3; + i2 = address & 0x1ff; + + return &pde[i1][i2]; +} + +static u64 *npt_get_pte(u64 address) +{ + int i1, i2; + + address >>= 12; + i1 = (address >> 9) & 0x7ff; + i2 = address & 0x1ff; + + return &pte[i1][i2]; +} + +static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, + u64 base, u32 limit, u32 attr) +{ + seg->selector = selector; + seg->attrib = attr; + seg->limit = limit; + seg->base = base; +} + +static void vmcb_ident(struct vmcb *vmcb) +{ + u64 vmcb_phys = virt_to_phys(vmcb); + struct vmcb_save_area *save = &vmcb->save; + struct vmcb_control_area *ctrl = &vmcb->control; + u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; + u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; + struct descriptor_table_ptr desc_table_ptr; + + memset(vmcb, 0, sizeof(*vmcb)); + asm volatile ("vmsave" : : "a"(vmcb_phys) : "memory"); + vmcb_set_seg(&save->es, read_es(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->cs, read_cs(), 0, -1U, code_seg_attr); + vmcb_set_seg(&save->ss, read_ss(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->ds, read_ds(), 0, -1U, data_seg_attr); + sgdt(&desc_table_ptr); + vmcb_set_seg(&save->gdtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0); + sidt(&desc_table_ptr); + vmcb_set_seg(&save->idtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0); + ctrl->asid = 1; + save->cpl = 0; + save->efer = rdmsr(MSR_EFER); + save->cr4 = read_cr4(); + save->cr3 = read_cr3(); + save->cr0 = read_cr0(); + save->dr7 = read_dr7(); + save->dr6 = read_dr6(); + save->cr2 = read_cr2(); + save->g_pat = rdmsr(MSR_IA32_CR_PAT); + save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR); + ctrl->intercept = (1ULL << INTERCEPT_VMRUN) | (1ULL << INTERCEPT_VMMCALL); + ctrl->iopm_base_pa = virt_to_phys(io_bitmap); + + if (npt_supported()) { + ctrl->nested_ctl = 1; + ctrl->nested_cr3 = (u64)pml4e; + } +} + +struct test { + const char *name; + bool (*supported)(void); + void (*prepare)(struct test *test); + void (*guest_func)(struct test *test); + bool (*finished)(struct test *test); + bool (*succeeded)(struct test *test); + struct vmcb *vmcb; + int exits; + ulong scratch; +}; + +static inline void vmmcall(void) +{ + asm volatile ("vmmcall" : : : "memory"); +} + +static void test_thunk(struct test *test) +{ + test->guest_func(test); + vmmcall(); +} + +struct regs { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 cr2; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u64 rflags; +}; + +struct regs regs; + +// rax handled specially below + +#define SAVE_GPR_C \ + "xchg %%rbx, regs+0x8\n\t" \ + "xchg %%rcx, regs+0x10\n\t" \ + "xchg %%rdx, regs+0x18\n\t" \ + "xchg %%rbp, regs+0x28\n\t" \ + "xchg %%rsi, regs+0x30\n\t" \ + "xchg %%rdi, regs+0x38\n\t" \ + "xchg %%r8, regs+0x40\n\t" \ + "xchg %%r9, regs+0x48\n\t" \ + "xchg %%r10, regs+0x50\n\t" \ + "xchg %%r11, regs+0x58\n\t" \ + "xchg %%r12, regs+0x60\n\t" \ + "xchg %%r13, regs+0x68\n\t" \ + "xchg %%r14, regs+0x70\n\t" \ + "xchg %%r15, regs+0x78\n\t" + +#define LOAD_GPR_C SAVE_GPR_C + +static void test_run(struct test *test, struct vmcb *vmcb) +{ + u64 vmcb_phys = virt_to_phys(vmcb); + u64 guest_stack[10000]; + + test->vmcb = vmcb; + test->prepare(test); + vmcb->save.rip = (ulong)test_thunk; + vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack)); + regs.rdi = (ulong)test; + do { + tsc_start = rdtsc(); + asm volatile ( + "clgi \n\t" + "vmload \n\t" + "mov regs+0x80, %%r15\n\t" // rflags + "mov %%r15, 0x170(%0)\n\t" + "mov regs, %%r15\n\t" // rax + "mov %%r15, 0x1f8(%0)\n\t" + LOAD_GPR_C + "vmrun \n\t" + SAVE_GPR_C + "mov 0x170(%0), %%r15\n\t" // rflags + "mov %%r15, regs+0x80\n\t" + "mov 0x1f8(%0), %%r15\n\t" // rax + "mov %%r15, regs\n\t" + "vmsave \n\t" + "stgi" + : : "a"(vmcb_phys) + : "rbx", "rcx", "rdx", "rsi", + "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15", + "memory"); + tsc_end = rdtsc(); + ++test->exits; + } while (!test->finished(test)); + + report("%s", test->succeeded(test), test->name); +} + +static bool smp_supported(void) +{ + return cpu_count() > 1; +} + +static bool default_supported(void) +{ + return true; +} + +static void default_prepare(struct test *test) +{ + vmcb_ident(test->vmcb); + cli(); +} + +static bool default_finished(struct test *test) +{ + return true; /* one vmexit */ +} + +static void null_test(struct test *test) +{ +} + +static bool null_check(struct test *test) +{ + return test->vmcb->control.exit_code == SVM_EXIT_VMMCALL; +} + +static void prepare_no_vmrun_int(struct test *test) +{ + test->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMRUN); +} + +static bool check_no_vmrun_int(struct test *test) +{ + return test->vmcb->control.exit_code == SVM_EXIT_ERR; +} + +static void test_vmrun(struct test *test) +{ + asm volatile ("vmrun" : : "a"(virt_to_phys(test->vmcb))); +} + +static bool check_vmrun(struct test *test) +{ + return test->vmcb->control.exit_code == SVM_EXIT_VMRUN; +} + +static void prepare_cr3_intercept(struct test *test) +{ + default_prepare(test); + test->vmcb->control.intercept_cr_read |= 1 << 3; +} + +static void test_cr3_intercept(struct test *test) +{ + asm volatile ("mov %%cr3, %0" : "=r"(test->scratch) : : "memory"); +} + +static bool check_cr3_intercept(struct test *test) +{ + return test->vmcb->control.exit_code == SVM_EXIT_READ_CR3; +} + +static bool check_cr3_nointercept(struct test *test) +{ + return null_check(test) && test->scratch == read_cr3(); +} + +static void corrupt_cr3_intercept_bypass(void *_test) +{ + struct test *test = _test; + extern volatile u32 mmio_insn; + + while (!__sync_bool_compare_and_swap(&test->scratch, 1, 2)) + pause(); + pause(); + pause(); + pause(); + mmio_insn = 0x90d8200f; // mov %cr3, %rax; nop +} + +static void prepare_cr3_intercept_bypass(struct test *test) +{ + default_prepare(test); + test->vmcb->control.intercept_cr_read |= 1 << 3; + on_cpu_async(1, corrupt_cr3_intercept_bypass, test); +} + +static void test_cr3_intercept_bypass(struct test *test) +{ + ulong a = 0xa0000; + + test->scratch = 1; + while (test->scratch != 2) + barrier(); + + asm volatile ("mmio_insn: mov %0, (%0); nop" + : "+a"(a) : : "memory"); + test->scratch = a; +} + +static bool next_rip_supported(void) +{ + return (cpuid(SVM_CPUID_FUNC).d & 8); +} + +static void prepare_next_rip(struct test *test) +{ + test->vmcb->control.intercept |= (1ULL << INTERCEPT_RDTSC); +} + + +static void test_next_rip(struct test *test) +{ + asm volatile ("rdtsc\n\t" + ".globl exp_next_rip\n\t" + "exp_next_rip:\n\t" ::: "eax", "edx"); +} + +static bool check_next_rip(struct test *test) +{ + extern char exp_next_rip; + unsigned long address = (unsigned long)&exp_next_rip; + + return address == test->vmcb->control.next_rip; +} + +static void prepare_mode_switch(struct test *test) +{ + test->vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR) + | (1ULL << UD_VECTOR) + | (1ULL << DF_VECTOR) + | (1ULL << PF_VECTOR); + test->scratch = 0; +} + +static void test_mode_switch(struct test *test) +{ + asm volatile(" cli\n" + " ljmp *1f\n" /* jump to 32-bit code segment */ + "1:\n" + " .long 2f\n" + " .long " xstr(KERNEL_CS32) "\n" + ".code32\n" + "2:\n" + " movl %%cr0, %%eax\n" + " btcl $31, %%eax\n" /* clear PG */ + " movl %%eax, %%cr0\n" + " movl $0xc0000080, %%ecx\n" /* EFER */ + " rdmsr\n" + " btcl $8, %%eax\n" /* clear LME */ + " wrmsr\n" + " movl %%cr4, %%eax\n" + " btcl $5, %%eax\n" /* clear PAE */ + " movl %%eax, %%cr4\n" + " movw %[ds16], %%ax\n" + " movw %%ax, %%ds\n" + " ljmpl %[cs16], $3f\n" /* jump to 16 bit protected-mode */ + ".code16\n" + "3:\n" + " movl %%cr0, %%eax\n" + " btcl $0, %%eax\n" /* clear PE */ + " movl %%eax, %%cr0\n" + " ljmpl $0, $4f\n" /* jump to real-mode */ + "4:\n" + " vmmcall\n" + " movl %%cr0, %%eax\n" + " btsl $0, %%eax\n" /* set PE */ + " movl %%eax, %%cr0\n" + " ljmpl %[cs32], $5f\n" /* back to protected mode */ + ".code32\n" + "5:\n" + " movl %%cr4, %%eax\n" + " btsl $5, %%eax\n" /* set PAE */ + " movl %%eax, %%cr4\n" + " movl $0xc0000080, %%ecx\n" /* EFER */ + " rdmsr\n" + " btsl $8, %%eax\n" /* set LME */ + " wrmsr\n" + " movl %%cr0, %%eax\n" + " btsl $31, %%eax\n" /* set PG */ + " movl %%eax, %%cr0\n" + " ljmpl %[cs64], $6f\n" /* back to long mode */ + ".code64\n\t" + "6:\n" + " vmmcall\n" + :: [cs16] "i"(KERNEL_CS16), [ds16] "i"(KERNEL_DS16), + [cs32] "i"(KERNEL_CS32), [cs64] "i"(KERNEL_CS64) + : "rax", "rbx", "rcx", "rdx", "memory"); +} + +static bool mode_switch_finished(struct test *test) +{ + u64 cr0, cr4, efer; + + cr0 = test->vmcb->save.cr0; + cr4 = test->vmcb->save.cr4; + efer = test->vmcb->save.efer; + + /* Only expect VMMCALL intercepts */ + if (test->vmcb->control.exit_code != SVM_EXIT_VMMCALL) + return true; + + /* Jump over VMMCALL instruction */ + test->vmcb->save.rip += 3; + + /* Do sanity checks */ + switch (test->scratch) { + case 0: + /* Test should be in real mode now - check for this */ + if ((cr0 & 0x80000001) || /* CR0.PG, CR0.PE */ + (cr4 & 0x00000020) || /* CR4.PAE */ + (efer & 0x00000500)) /* EFER.LMA, EFER.LME */ + return true; + break; + case 2: + /* Test should be back in long-mode now - check for this */ + if (((cr0 & 0x80000001) != 0x80000001) || /* CR0.PG, CR0.PE */ + ((cr4 & 0x00000020) != 0x00000020) || /* CR4.PAE */ + ((efer & 0x00000500) != 0x00000500)) /* EFER.LMA, EFER.LME */ + return true; + break; + } + + /* one step forward */ + test->scratch += 1; + + return test->scratch == 2; +} + +static bool check_mode_switch(struct test *test) +{ + return test->scratch == 2; +} + +static void prepare_ioio(struct test *test) +{ + test->vmcb->control.intercept |= (1ULL << INTERCEPT_IOIO_PROT); + test->scratch = 0; + memset(io_bitmap, 0, 8192); + io_bitmap[8192] = 0xFF; +} + +int get_test_stage(struct test *test) +{ + barrier(); + return test->scratch; +} + +void inc_test_stage(struct test *test) +{ + barrier(); + test->scratch++; + barrier(); +} + +static void test_ioio(struct test *test) +{ + // stage 0, test IO pass + inb(0x5000); + outb(0x0, 0x5000); + if (get_test_stage(test) != 0) + goto fail; + + // test IO width, in/out + io_bitmap[0] = 0xFF; + inc_test_stage(test); + inb(0x0); + if (get_test_stage(test) != 2) + goto fail; + + outw(0x0, 0x0); + if (get_test_stage(test) != 3) + goto fail; + + inl(0x0); + if (get_test_stage(test) != 4) + goto fail; + + // test low/high IO port + io_bitmap[0x5000 / 8] = (1 << (0x5000 % 8)); + inb(0x5000); + if (get_test_stage(test) != 5) + goto fail; + + io_bitmap[0x9000 / 8] = (1 << (0x9000 % 8)); + inw(0x9000); + if (get_test_stage(test) != 6) + goto fail; + + // test partial pass + io_bitmap[0x5000 / 8] = (1 << (0x5000 % 8)); + inl(0x4FFF); + if (get_test_stage(test) != 7) + goto fail; + + // test across pages + inc_test_stage(test); + inl(0x7FFF); + if (get_test_stage(test) != 8) + goto fail; + + inc_test_stage(test); + io_bitmap[0x8000 / 8] = 1 << (0x8000 % 8); + inl(0x7FFF); + if (get_test_stage(test) != 10) + goto fail; + + io_bitmap[0] = 0; + inl(0xFFFF); + if (get_test_stage(test) != 11) + goto fail; + + io_bitmap[0] = 0xFF; + io_bitmap[8192] = 0; + inl(0xFFFF); + inc_test_stage(test); + if (get_test_stage(test) != 12) + goto fail; + + return; + +fail: + report("stage %d", false, get_test_stage(test)); + test->scratch = -1; +} + +static bool ioio_finished(struct test *test) +{ + unsigned port, size; + + /* Only expect IOIO intercepts */ + if (test->vmcb->control.exit_code == SVM_EXIT_VMMCALL) + return true; + + if (test->vmcb->control.exit_code != SVM_EXIT_IOIO) + return true; + + /* one step forward */ + test->scratch += 1; + + port = test->vmcb->control.exit_info_1 >> 16; + size = (test->vmcb->control.exit_info_1 >> SVM_IOIO_SIZE_SHIFT) & 7; + + while (size--) { + io_bitmap[port / 8] &= ~(1 << (port & 7)); + port++; + } + + return false; +} + +static bool check_ioio(struct test *test) +{ + memset(io_bitmap, 0, 8193); + return test->scratch != -1; +} + +static void prepare_asid_zero(struct test *test) +{ + test->vmcb->control.asid = 0; +} + +static void test_asid_zero(struct test *test) +{ + asm volatile ("vmmcall\n\t"); +} + +static bool check_asid_zero(struct test *test) +{ + return test->vmcb->control.exit_code == SVM_EXIT_ERR; +} + +static void sel_cr0_bug_prepare(struct test *test) +{ + vmcb_ident(test->vmcb); + test->vmcb->control.intercept |= (1ULL << INTERCEPT_SELECTIVE_CR0); +} + +static bool sel_cr0_bug_finished(struct test *test) +{ + return true; +} + +static void sel_cr0_bug_test(struct test *test) +{ + unsigned long cr0; + + /* read cr0, clear CD, and write back */ + cr0 = read_cr0(); + cr0 |= (1UL << 30); + write_cr0(cr0); + + /* + * If we are here the test failed, not sure what to do now because we + * are not in guest-mode anymore so we can't trigger an intercept. + * Trigger a tripple-fault for now. + */ + report("sel_cr0 test. Can not recover from this - exiting", false); + exit(report_summary()); +} + +static bool sel_cr0_bug_check(struct test *test) +{ + return test->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE; +} + +static void npt_nx_prepare(struct test *test) +{ + + u64 *pte; + + vmcb_ident(test->vmcb); + pte = npt_get_pte((u64)null_test); + + *pte |= (1ULL << 63); +} + +static bool npt_nx_check(struct test *test) +{ + u64 *pte = npt_get_pte((u64)null_test); + + *pte &= ~(1ULL << 63); + + test->vmcb->save.efer |= (1 << 11); + + return (test->vmcb->control.exit_code == SVM_EXIT_NPF) + && (test->vmcb->control.exit_info_1 == 0x100000015ULL); +} + +static void npt_us_prepare(struct test *test) +{ + u64 *pte; + + vmcb_ident(test->vmcb); + pte = npt_get_pte((u64)scratch_page); + + *pte &= ~(1ULL << 2); +} + +static void npt_us_test(struct test *test) +{ + (void) *(volatile u64 *)scratch_page; +} + +static bool npt_us_check(struct test *test) +{ + u64 *pte = npt_get_pte((u64)scratch_page); + + *pte |= (1ULL << 2); + + return (test->vmcb->control.exit_code == SVM_EXIT_NPF) + && (test->vmcb->control.exit_info_1 == 0x100000005ULL); +} + +u64 save_pde; + +static void npt_rsvd_prepare(struct test *test) +{ + u64 *pde; + + vmcb_ident(test->vmcb); + pde = npt_get_pde((u64) null_test); + + save_pde = *pde; + *pde = (1ULL << 19) | (1ULL << 7) | 0x27; +} + +static bool npt_rsvd_check(struct test *test) +{ + u64 *pde = npt_get_pde((u64) null_test); + + *pde = save_pde; + + return (test->vmcb->control.exit_code == SVM_EXIT_NPF) + && (test->vmcb->control.exit_info_1 == 0x10000001dULL); +} + +static void npt_rw_prepare(struct test *test) +{ + + u64 *pte; + + vmcb_ident(test->vmcb); + pte = npt_get_pte(0x80000); + + *pte &= ~(1ULL << 1); +} + +static void npt_rw_test(struct test *test) +{ + u64 *data = (void*)(0x80000); + + *data = 0; +} + +static bool npt_rw_check(struct test *test) +{ + u64 *pte = npt_get_pte(0x80000); + + *pte |= (1ULL << 1); + + return (test->vmcb->control.exit_code == SVM_EXIT_NPF) + && (test->vmcb->control.exit_info_1 == 0x100000007ULL); +} + +static void npt_rw_pfwalk_prepare(struct test *test) +{ + + u64 *pte; + + vmcb_ident(test->vmcb); + pte = npt_get_pte(read_cr3()); + + *pte &= ~(1ULL << 1); +} + +static bool npt_rw_pfwalk_check(struct test *test) +{ + u64 *pte = npt_get_pte(read_cr3()); + + *pte |= (1ULL << 1); + + return (test->vmcb->control.exit_code == SVM_EXIT_NPF) + && (test->vmcb->control.exit_info_1 == 0x200000006ULL) + && (test->vmcb->control.exit_info_2 == read_cr3()); +} + +static void npt_rsvd_pfwalk_prepare(struct test *test) +{ + + vmcb_ident(test->vmcb); + + pdpe[0] |= (1ULL << 8); +} + +static bool npt_rsvd_pfwalk_check(struct test *test) +{ + pdpe[0] &= ~(1ULL << 8); + + return (test->vmcb->control.exit_code == SVM_EXIT_NPF) + && (test->vmcb->control.exit_info_1 == 0x200000006ULL); +} + +static void npt_l1mmio_prepare(struct test *test) +{ + vmcb_ident(test->vmcb); +} + +u32 nested_apic_version1; +u32 nested_apic_version2; + +static void npt_l1mmio_test(struct test *test) +{ + volatile u32 *data = (volatile void*)(0xfee00030UL); + + nested_apic_version1 = *data; + nested_apic_version2 = *data; +} + +static bool npt_l1mmio_check(struct test *test) +{ + volatile u32 *data = (volatile void*)(0xfee00030); + u32 lvr = *data; + + return nested_apic_version1 == lvr && nested_apic_version2 == lvr; +} + +static void npt_rw_l1mmio_prepare(struct test *test) +{ + + u64 *pte; + + vmcb_ident(test->vmcb); + pte = npt_get_pte(0xfee00080); + + *pte &= ~(1ULL << 1); +} + +static void npt_rw_l1mmio_test(struct test *test) +{ + volatile u32 *data = (volatile void*)(0xfee00080); + + *data = *data; +} + +static bool npt_rw_l1mmio_check(struct test *test) +{ + u64 *pte = npt_get_pte(0xfee00080); + + *pte |= (1ULL << 1); + + return (test->vmcb->control.exit_code == SVM_EXIT_NPF) + && (test->vmcb->control.exit_info_1 == 0x100000007ULL); +} + +static void latency_prepare(struct test *test) +{ + default_prepare(test); + runs = LATENCY_RUNS; + latvmrun_min = latvmexit_min = -1ULL; + latvmrun_max = latvmexit_max = 0; + vmrun_sum = vmexit_sum = 0; +} + +static void latency_test(struct test *test) +{ + u64 cycles; + +start: + tsc_end = rdtsc(); + + cycles = tsc_end - tsc_start; + + if (cycles > latvmrun_max) + latvmrun_max = cycles; + + if (cycles < latvmrun_min) + latvmrun_min = cycles; + + vmrun_sum += cycles; + + tsc_start = rdtsc(); + + asm volatile ("vmmcall" : : : "memory"); + goto start; +} + +static bool latency_finished(struct test *test) +{ + u64 cycles; + + tsc_end = rdtsc(); + + cycles = tsc_end - tsc_start; + + if (cycles > latvmexit_max) + latvmexit_max = cycles; + + if (cycles < latvmexit_min) + latvmexit_min = cycles; + + vmexit_sum += cycles; + + test->vmcb->save.rip += 3; + + runs -= 1; + + return runs == 0; +} + +static bool latency_check(struct test *test) +{ + printf(" Latency VMRUN : max: %ld min: %ld avg: %ld\n", latvmrun_max, + latvmrun_min, vmrun_sum / LATENCY_RUNS); + printf(" Latency VMEXIT: max: %ld min: %ld avg: %ld\n", latvmexit_max, + latvmexit_min, vmexit_sum / LATENCY_RUNS); + return true; +} + +static void lat_svm_insn_prepare(struct test *test) +{ + default_prepare(test); + runs = LATENCY_RUNS; + latvmload_min = latvmsave_min = latstgi_min = latclgi_min = -1ULL; + latvmload_max = latvmsave_max = latstgi_max = latclgi_max = 0; + vmload_sum = vmsave_sum = stgi_sum = clgi_sum; +} + +static bool lat_svm_insn_finished(struct test *test) +{ + u64 vmcb_phys = virt_to_phys(test->vmcb); + u64 cycles; + + for ( ; runs != 0; runs--) { + tsc_start = rdtsc(); + asm volatile("vmload\n\t" : : "a"(vmcb_phys) : "memory"); + cycles = rdtsc() - tsc_start; + if (cycles > latvmload_max) + latvmload_max = cycles; + if (cycles < latvmload_min) + latvmload_min = cycles; + vmload_sum += cycles; + + tsc_start = rdtsc(); + asm volatile("vmsave\n\t" : : "a"(vmcb_phys) : "memory"); + cycles = rdtsc() - tsc_start; + if (cycles > latvmsave_max) + latvmsave_max = cycles; + if (cycles < latvmsave_min) + latvmsave_min = cycles; + vmsave_sum += cycles; + + tsc_start = rdtsc(); + asm volatile("stgi\n\t"); + cycles = rdtsc() - tsc_start; + if (cycles > latstgi_max) + latstgi_max = cycles; + if (cycles < latstgi_min) + latstgi_min = cycles; + stgi_sum += cycles; + + tsc_start = rdtsc(); + asm volatile("clgi\n\t"); + cycles = rdtsc() - tsc_start; + if (cycles > latclgi_max) + latclgi_max = cycles; + if (cycles < latclgi_min) + latclgi_min = cycles; + clgi_sum += cycles; + } + + return true; +} + +static bool lat_svm_insn_check(struct test *test) +{ + printf(" Latency VMLOAD: max: %ld min: %ld avg: %ld\n", latvmload_max, + latvmload_min, vmload_sum / LATENCY_RUNS); + printf(" Latency VMSAVE: max: %ld min: %ld avg: %ld\n", latvmsave_max, + latvmsave_min, vmsave_sum / LATENCY_RUNS); + printf(" Latency STGI: max: %ld min: %ld avg: %ld\n", latstgi_max, + latstgi_min, stgi_sum / LATENCY_RUNS); + printf(" Latency CLGI: max: %ld min: %ld avg: %ld\n", latclgi_max, + latclgi_min, clgi_sum / LATENCY_RUNS); + return true; +} +static struct test tests[] = { + { "null", default_supported, default_prepare, null_test, + default_finished, null_check }, + { "vmrun", default_supported, default_prepare, test_vmrun, + default_finished, check_vmrun }, + { "ioio", default_supported, prepare_ioio, test_ioio, + ioio_finished, check_ioio }, + { "vmrun intercept check", default_supported, prepare_no_vmrun_int, + null_test, default_finished, check_no_vmrun_int }, + { "cr3 read intercept", default_supported, prepare_cr3_intercept, + test_cr3_intercept, default_finished, check_cr3_intercept }, + { "cr3 read nointercept", default_supported, default_prepare, + test_cr3_intercept, default_finished, check_cr3_nointercept }, + { "cr3 read intercept emulate", smp_supported, + prepare_cr3_intercept_bypass, test_cr3_intercept_bypass, + default_finished, check_cr3_intercept }, + { "next_rip", next_rip_supported, prepare_next_rip, test_next_rip, + default_finished, check_next_rip }, + { "mode_switch", default_supported, prepare_mode_switch, test_mode_switch, + mode_switch_finished, check_mode_switch }, + { "asid_zero", default_supported, prepare_asid_zero, test_asid_zero, + default_finished, check_asid_zero }, + { "sel_cr0_bug", default_supported, sel_cr0_bug_prepare, sel_cr0_bug_test, + sel_cr0_bug_finished, sel_cr0_bug_check }, + { "npt_nx", npt_supported, npt_nx_prepare, null_test, + default_finished, npt_nx_check }, + { "npt_us", npt_supported, npt_us_prepare, npt_us_test, + default_finished, npt_us_check }, + { "npt_rsvd", npt_supported, npt_rsvd_prepare, null_test, + default_finished, npt_rsvd_check }, + { "npt_rw", npt_supported, npt_rw_prepare, npt_rw_test, + default_finished, npt_rw_check }, + { "npt_rsvd_pfwalk", npt_supported, npt_rsvd_pfwalk_prepare, null_test, + default_finished, npt_rsvd_pfwalk_check }, + { "npt_rw_pfwalk", npt_supported, npt_rw_pfwalk_prepare, null_test, + default_finished, npt_rw_pfwalk_check }, + { "npt_l1mmio", npt_supported, npt_l1mmio_prepare, npt_l1mmio_test, + default_finished, npt_l1mmio_check }, + { "npt_rw_l1mmio", npt_supported, npt_rw_l1mmio_prepare, npt_rw_l1mmio_test, + default_finished, npt_rw_l1mmio_check }, + { "latency_run_exit", default_supported, latency_prepare, latency_test, + latency_finished, latency_check }, + { "latency_svm_insn", default_supported, lat_svm_insn_prepare, null_test, + lat_svm_insn_finished, lat_svm_insn_check }, +}; + +int main(int ac, char **av) +{ + int i, nr; + struct vmcb *vmcb; + + setup_vm(); + smp_init(); + + if (!(cpuid(0x80000001).c & 4)) { + printf("SVM not availble\n"); + return report_summary(); + } + + setup_svm(); + + vmcb = alloc_page(); + + nr = ARRAY_SIZE(tests); + for (i = 0; i < nr; ++i) { + if (!tests[i].supported()) + continue; + test_run(&tests[i], vmcb); + } + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/svm.h b/tests/kvm-unit-tests/x86/svm.h new file mode 100644 index 00000000..3fdc0d33 --- /dev/null +++ b/tests/kvm-unit-tests/x86/svm.h @@ -0,0 +1,328 @@ +#ifndef __SVM_H +#define __SVM_H + +#include "libcflat.h" + +enum { + INTERCEPT_INTR, + INTERCEPT_NMI, + INTERCEPT_SMI, + INTERCEPT_INIT, + INTERCEPT_VINTR, + INTERCEPT_SELECTIVE_CR0, + INTERCEPT_STORE_IDTR, + INTERCEPT_STORE_GDTR, + INTERCEPT_STORE_LDTR, + INTERCEPT_STORE_TR, + INTERCEPT_LOAD_IDTR, + INTERCEPT_LOAD_GDTR, + INTERCEPT_LOAD_LDTR, + INTERCEPT_LOAD_TR, + INTERCEPT_RDTSC, + INTERCEPT_RDPMC, + INTERCEPT_PUSHF, + INTERCEPT_POPF, + INTERCEPT_CPUID, + INTERCEPT_RSM, + INTERCEPT_IRET, + INTERCEPT_INTn, + INTERCEPT_INVD, + INTERCEPT_PAUSE, + INTERCEPT_HLT, + INTERCEPT_INVLPG, + INTERCEPT_INVLPGA, + INTERCEPT_IOIO_PROT, + INTERCEPT_MSR_PROT, + INTERCEPT_TASK_SWITCH, + INTERCEPT_FERR_FREEZE, + INTERCEPT_SHUTDOWN, + INTERCEPT_VMRUN, + INTERCEPT_VMMCALL, + INTERCEPT_VMLOAD, + INTERCEPT_VMSAVE, + INTERCEPT_STGI, + INTERCEPT_CLGI, + INTERCEPT_SKINIT, + INTERCEPT_RDTSCP, + INTERCEPT_ICEBP, + INTERCEPT_WBINVD, + INTERCEPT_MONITOR, + INTERCEPT_MWAIT, + INTERCEPT_MWAIT_COND, +}; + + +struct __attribute__ ((__packed__)) vmcb_control_area { + u16 intercept_cr_read; + u16 intercept_cr_write; + u16 intercept_dr_read; + u16 intercept_dr_write; + u32 intercept_exceptions; + u64 intercept; + u8 reserved_1[42]; + u16 pause_filter_count; + u64 iopm_base_pa; + u64 msrpm_base_pa; + u64 tsc_offset; + u32 asid; + u8 tlb_ctl; + u8 reserved_2[3]; + u32 int_ctl; + u32 int_vector; + u32 int_state; + u8 reserved_3[4]; + u32 exit_code; + u32 exit_code_hi; + u64 exit_info_1; + u64 exit_info_2; + u32 exit_int_info; + u32 exit_int_info_err; + u64 nested_ctl; + u8 reserved_4[16]; + u32 event_inj; + u32 event_inj_err; + u64 nested_cr3; + u64 lbr_ctl; + u64 reserved_5; + u64 next_rip; + u8 reserved_6[816]; +}; + + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define SVM_INTERRUPT_SHADOW_MASK 1 + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + +struct __attribute__ ((__packed__)) vmcb_seg { + u16 selector; + u16 attrib; + u32 limit; + u64 base; +}; + +struct __attribute__ ((__packed__)) vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u8 reserved_1[43]; + u8 cpl; + u8 reserved_2[4]; + u64 efer; + u8 reserved_3[112]; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u8 reserved_4[88]; + u64 rsp; + u8 reserved_5[24]; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_6[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; +}; + +struct __attribute__ ((__packed__)) vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +}; + +#define SVM_CPUID_FEATURE_SHIFT 2 +#define SVM_CPUID_FUNC 0x8000000a + +#define SVM_VM_CR_SVM_DISABLE 4 + +#define SVM_SELECTOR_S_SHIFT 4 +#define SVM_SELECTOR_DPL_SHIFT 5 +#define SVM_SELECTOR_P_SHIFT 7 +#define SVM_SELECTOR_AVL_SHIFT 8 +#define SVM_SELECTOR_L_SHIFT 9 +#define SVM_SELECTOR_DB_SHIFT 10 +#define SVM_SELECTOR_G_SHIFT 11 + +#define SVM_SELECTOR_TYPE_MASK (0xf) +#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) +#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) +#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) +#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) +#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) +#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) +#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) + +#define SVM_SELECTOR_WRITE_MASK (1 << 1) +#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK +#define SVM_SELECTOR_CODE_MASK (1 << 3) + +#define INTERCEPT_CR0_MASK 1 +#define INTERCEPT_CR3_MASK (1 << 3) +#define INTERCEPT_CR4_MASK (1 << 4) +#define INTERCEPT_CR8_MASK (1 << 8) + +#define INTERCEPT_DR0_MASK 1 +#define INTERCEPT_DR1_MASK (1 << 1) +#define INTERCEPT_DR2_MASK (1 << 2) +#define INTERCEPT_DR3_MASK (1 << 3) +#define INTERCEPT_DR4_MASK (1 << 4) +#define INTERCEPT_DR5_MASK (1 << 5) +#define INTERCEPT_DR6_MASK (1 << 6) +#define INTERCEPT_DR7_MASK (1 << 7) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 +#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 + +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) + +#endif + diff --git a/tests/kvm-unit-tests/x86/taskswitch.c b/tests/kvm-unit-tests/x86/taskswitch.c new file mode 100644 index 00000000..01483a16 --- /dev/null +++ b/tests/kvm-unit-tests/x86/taskswitch.c @@ -0,0 +1,50 @@ +/* + * Copyright 2010 Siemens AG + * Author: Jan Kiszka + * + * Released under GPLv2. + */ + +#include "libcflat.h" +#include "x86/desc.h" + +#define TSS_RETURN (FIRST_SPARE_SEL) + +void fault_entry(void); + +static __attribute__((used, regparm(1))) void +fault_handler(unsigned long error_code) +{ + print_current_tss_info(); + printf("error code %lx\n", error_code); + + tss.eip += 2; + + gdt32[TSS_MAIN / 8].access &= ~2; + + set_gdt_task_gate(TSS_RETURN, tss_intr.prev); +} + +asm ( + "fault_entry:\n" + " mov (%esp),%eax\n" + " call fault_handler\n" + " jmp $" xstr(TSS_RETURN) ", $0\n" +); + +int main(int ac, char **av) +{ + const long invalid_segment = 0x1234; + + setup_tss32(); + set_intr_task_gate(13, fault_entry); + + asm ( + "mov %0,%%es\n" + : : "r" (invalid_segment) : "edi" + ); + + printf("post fault\n"); + + return 0; +} diff --git a/tests/kvm-unit-tests/x86/taskswitch2.c b/tests/kvm-unit-tests/x86/taskswitch2.c new file mode 100644 index 00000000..bb7345b2 --- /dev/null +++ b/tests/kvm-unit-tests/x86/taskswitch2.c @@ -0,0 +1,294 @@ +#include "libcflat.h" +#include "desc.h" +#include "apic-defs.h" +#include "apic.h" +#include "processor.h" +#include "vm.h" + +#define MAIN_TSS_SEL (FIRST_SPARE_SEL + 0) +#define VM86_TSS_SEL (FIRST_SPARE_SEL + 8) +#define CONFORM_CS_SEL (FIRST_SPARE_SEL + 16) + +static volatile int test_count; +static volatile unsigned int test_divider; + +static char *fault_addr; +static ulong fault_phys; + +static inline void io_delay(void) +{ +} + +static void nmi_tss(void) +{ +start: + printf("NMI task is running\n"); + print_current_tss_info(); + test_count++; + asm volatile ("iret"); + goto start; +} + +static void de_tss(void) +{ +start: + printf("DE task is running\n"); + print_current_tss_info(); + test_divider = 10; + test_count++; + asm volatile ("iret"); + goto start; +} + +static void of_tss(void) +{ +start: + printf("OF task is running\n"); + print_current_tss_info(); + test_count++; + asm volatile ("iret"); + goto start; +} + +static void bp_tss(void) +{ +start: + printf("BP task is running\n"); + print_current_tss_info(); + test_count++; + asm volatile ("iret"); + goto start; +} + +void do_pf_tss(ulong *error_code) +{ + printf("PF task is running %p %lx\n", error_code, *error_code); + print_current_tss_info(); + if (*error_code == 0x2) /* write access, not present */ + test_count++; + install_pte(phys_to_virt(read_cr3()), 1, fault_addr, + fault_phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0); +} + +extern void pf_tss(void); + +asm ( + "pf_tss: \n\t" + "push %esp \n\t" + "call do_pf_tss \n\t" + "add $4, %esp \n\t" + "iret\n\t" + "jmp pf_tss\n\t" + ); + +static void jmp_tss(void) +{ +start: + printf("JMP to task succeeded\n"); + print_current_tss_info(); + test_count++; + asm volatile ("ljmp $" xstr(TSS_MAIN) ", $0"); + goto start; +} + +static void irq_tss(void) +{ +start: + printf("IRQ task is running\n"); + print_current_tss_info(); + test_count++; + asm volatile ("iret"); + test_count++; + printf("IRQ task restarts after iret.\n"); + goto start; +} + +static void user_tss(void) +{ +start: + printf("Conforming task is running\n"); + print_current_tss_info(); + test_count++; + asm volatile ("iret"); + goto start; +} + +void test_kernel_mode_int() +{ + unsigned int res; + + /* test that int $2 triggers task gate */ + test_count = 0; + set_intr_task_gate(2, nmi_tss); + printf("Triggering nmi 2\n"); + asm volatile ("int $2"); + printf("Return from nmi %d\n", test_count); + report("NMI int $2", test_count == 1); + + /* test that external NMI triggers task gate */ + test_count = 0; + set_intr_task_gate(2, nmi_tss); + printf("Triggering nmi through APIC\n"); + apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0); + io_delay(); + printf("Return from APIC nmi\n"); + report("NMI external", test_count == 1); + + /* test that external interrupt triggesr task gate */ + test_count = 0; + printf("Trigger IRQ from APIC\n"); + set_intr_task_gate(0xf0, irq_tss); + irq_enable(); + apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT | 0xf0, 0); + io_delay(); + irq_disable(); + printf("Return from APIC IRQ\n"); + report("IRQ external", test_count == 1); + + /* test that HW exception triggesr task gate */ + set_intr_task_gate(0, de_tss); + printf("Try to devide by 0\n"); + asm volatile ("divl %3": "=a"(res) + : "d"(0), "a"(1500), "m"(test_divider)); + printf("Result is %d\n", res); + report("DE exeption", res == 150); + + /* test if call HW exeption DE by int $0 triggers task gate */ + test_count = 0; + set_intr_task_gate(0, de_tss); + printf("Call int 0\n"); + asm volatile ("int $0"); + printf("Return from int 0\n"); + report("int $0", test_count == 1); + + /* test if HW exception OF triggers task gate */ + test_count = 0; + set_intr_task_gate(4, of_tss); + printf("Call into\n"); + asm volatile ("addb $127, %b0\ninto"::"a"(127)); + printf("Return from into\n"); + report("OF exeption", test_count); + + /* test if HW exception BP triggers task gate */ + test_count = 0; + set_intr_task_gate(3, bp_tss); + printf("Call int 3\n"); + asm volatile ("int $3"); + printf("Return from int 3\n"); + report("BP exeption", test_count == 1); + + /* + * test that PF triggers task gate and error code is placed on + * exception task's stack + */ + fault_addr = alloc_vpage(); + fault_phys = (ulong)virt_to_phys(alloc_page()); + test_count = 0; + set_intr_task_gate(14, pf_tss); + printf("Access unmapped page\n"); + *fault_addr = 0; + printf("Return from pf tss\n"); + report("PF exeption", test_count == 1); +} + +void test_gdt_task_gate(void) +{ + /* test that calling a task by lcall works */ + test_count = 0; + tss_intr.eip = (u32)irq_tss; + printf("Calling task by lcall\n"); + /* hlt opcode is 0xf4 I use destination IP 0xf4f4f4f4 to catch + incorrect instruction length calculation */ + asm volatile("lcall $" xstr(TSS_INTR) ", $0xf4f4f4f4"); + printf("Return from call\n"); + report("lcall", test_count == 1); + + /* call the same task again and check that it restarted after iret */ + test_count = 0; + asm volatile("lcall $" xstr(TSS_INTR) ", $0xf4f4f4f4"); + report("lcall2", test_count == 2); + + /* test that calling a task by ljmp works */ + test_count = 0; + tss_intr.eip = (u32)jmp_tss; + printf("Jumping to a task by ljmp\n"); + asm volatile ("ljmp $" xstr(TSS_INTR) ", $0xf4f4f4f4"); + printf("Jump back succeeded\n"); + report("ljmp", test_count == 1); +} + +void test_vm86_switch(void) +{ + static tss32_t main_tss; + static tss32_t vm86_tss; + + u8 *vm86_start; + + /* Write a 'ud2' instruction somewhere below 1 MB */ + vm86_start = (void*) 0x42000; + vm86_start[0] = 0x0f; + vm86_start[1] = 0x0b; + + /* Main TSS */ + set_gdt_entry(MAIN_TSS_SEL, (u32)&main_tss, sizeof(tss32_t) - 1, 0x89, 0); + ltr(MAIN_TSS_SEL); + main_tss = (tss32_t) { + .prev = VM86_TSS_SEL, + .cr3 = read_cr3(), + }; + + /* VM86 TSS (marked as busy, so we can iret to it) */ + set_gdt_entry(VM86_TSS_SEL, (u32)&vm86_tss, sizeof(tss32_t) - 1, 0x8b, 0); + vm86_tss = (tss32_t) { + .eflags = 0x20002, + .cr3 = read_cr3(), + .eip = (u32) vm86_start & 0x0f, + .cs = (u32) vm86_start >> 4, + .ds = 0x1234, + .es = 0x2345, + }; + + /* Setup task gate to main TSS for #UD */ + set_idt_task_gate(6, MAIN_TSS_SEL); + + /* Jump into VM86 task with iret, #UD lets it come back immediately */ + printf("Switch to VM86 task and back\n"); + asm volatile( + "pushf\n" + "orw $0x4000, (%esp)\n" + "popf\n" + "iret\n" + ); + report("VM86", 1); +} + +#define IOPL_SHIFT 12 + +void test_conforming_switch(void) +{ + /* test lcall with conforming segment, cs.dpl != cs.rpl */ + test_count = 0; + + tss_intr.cs = CONFORM_CS_SEL | 3; + tss_intr.eip = (u32)user_tss; + tss_intr.ss = USER_DS; + tss_intr.ds = tss_intr.gs = tss_intr.es = tss_intr.fs = tss_intr.ss; + tss_intr.eflags |= 3 << IOPL_SHIFT; + set_gdt_entry(CONFORM_CS_SEL, 0, 0xffffffff, 0x9f, 0xc0); + asm volatile("lcall $" xstr(TSS_INTR) ", $0xf4f4f4f4"); + report("lcall with cs.rpl != cs.dpl", test_count == 1); +} + +int main() +{ + setup_vm(); + setup_idt(); + setup_tss32(); + + test_gdt_task_gate(); + test_kernel_mode_int(); + test_vm86_switch(); + test_conforming_switch(); + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/tsc.c b/tests/kvm-unit-tests/x86/tsc.c new file mode 100644 index 00000000..62450e71 --- /dev/null +++ b/tests/kvm-unit-tests/x86/tsc.c @@ -0,0 +1,47 @@ +#include "libcflat.h" +#include "processor.h" + +#define CPUID_80000001_EDX_RDTSCP (1 << 27) +int check_cpuid_80000001_edx(unsigned int bit) +{ + return (cpuid(0x80000001).d & bit) != 0; +} + + +void test_wrtsc(u64 t1) +{ + u64 t2; + + wrtsc(t1); + t2 = rdtsc(); + printf("rdtsc after wrtsc(%" PRId64 "): %" PRId64 "\n", t1, t2); +} + +void test_rdtscp(u64 aux) +{ + u32 ecx; + + wrmsr(MSR_TSC_AUX, aux); + rdtscp(&ecx); + report("Test RDTSCP %d", ecx == aux, aux); +} + +int main() +{ + u64 t1, t2; + + t1 = rdtsc(); + t2 = rdtsc(); + printf("rdtsc latency %u\n", (unsigned)(t2 - t1)); + + test_wrtsc(0); + test_wrtsc(100000000000ull); + + if (check_cpuid_80000001_edx(CPUID_80000001_EDX_RDTSCP)) { + test_rdtscp(0); + test_rdtscp(10); + test_rdtscp(0x100); + } else + printf("rdtscp not supported\n"); + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/tsc_adjust.c b/tests/kvm-unit-tests/x86/tsc_adjust.c new file mode 100644 index 00000000..725e4ff7 --- /dev/null +++ b/tests/kvm-unit-tests/x86/tsc_adjust.c @@ -0,0 +1,42 @@ +#include "libcflat.h" +#include "processor.h" + +#define IA32_TSC_ADJUST 0x3b + +int main() +{ + u64 t1, t2, t3, t4, t5; + u64 est_delta_time; + + if (cpuid(7).b & (1 << 1)) { // IA32_TSC_ADJUST Feature is enabled? + report("IA32_TSC_ADJUST msr initialization", + rdmsr(IA32_TSC_ADJUST) == 0x0); + t3 = 100000000000ull; + t1 = rdtsc(); + wrmsr(IA32_TSC_ADJUST, t3); + t2 = rdtsc(); + report("IA32_TSC_ADJUST msr read / write", + rdmsr(IA32_TSC_ADJUST) == t3); + report("TSC adjustment for IA32_TSC_ADJUST value", + (t2 - t1) >= t3); + t3 = 0x0; + wrmsr(IA32_TSC_ADJUST, t3); + report("IA32_TSC_ADJUST msr read / write", + rdmsr(IA32_TSC_ADJUST) == t3); + t4 = 100000000000ull; + t1 = rdtsc(); + wrtsc(t4); + t2 = rdtsc(); + t5 = rdmsr(IA32_TSC_ADJUST); + // est of time between reading tsc and writing tsc, + // (based on IA32_TSC_ADJUST msr value) should be small + est_delta_time = t4 - t5 - t1; + // arbitray 2x latency (wrtsc->rdtsc) threshold + report("IA32_TSC_ADJUST msr adjustment on tsc write", + est_delta_time <= (2 * (t2 - t4))); + } + else { + report("IA32_TSC_ADJUST feature not enabled", true); + } + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/tscdeadline_latency.c b/tests/kvm-unit-tests/x86/tscdeadline_latency.c new file mode 100644 index 00000000..0617a1b3 --- /dev/null +++ b/tests/kvm-unit-tests/x86/tscdeadline_latency.c @@ -0,0 +1,132 @@ +/* + * qemu command line | grep latency | cut -f 2 -d ":" > latency + * + * In octave: + * load latency + * min(latency) + * max(latency) + * mean(latency) + * hist(latency, 50) + */ + +/* + * for host tracing of breakmax option: + * + * # cd /sys/kernel/debug/tracing/ + * # echo x86-tsc > trace_clock + * # echo "kvm_exit kvm_entry kvm_msr" > set_event + * # echo "sched_switch $extratracepoints" >> set_event + * # echo apic_timer_fn > set_ftrace_filter + * # echo "function" > current_tracer + */ + +#include "libcflat.h" +#include "apic.h" +#include "vm.h" +#include "smp.h" +#include "desc.h" +#include "isr.h" +#include "msr.h" + +static void test_lapic_existence(void) +{ + u32 lvr; + + lvr = apic_read(APIC_LVR); + printf("apic version: %x\n", lvr); + report("apic existence", (u16)lvr == 0x14); +} + +#define TSC_DEADLINE_TIMER_VECTOR 0xef + +static int tdt_count; +u64 exptime; +int delta; +#define TABLE_SIZE 10000 +u64 table[TABLE_SIZE]; +volatile int table_idx; +volatile int hitmax = 0; +int breakmax = 0; + +static void tsc_deadline_timer_isr(isr_regs_t *regs) +{ + u64 now = rdtsc(); + ++tdt_count; + + if (table_idx < TABLE_SIZE && tdt_count > 1) + table[table_idx++] = now - exptime; + + if (breakmax && tdt_count > 1 && (now - exptime) > breakmax) { + hitmax = 1; + apic_write(APIC_EOI, 0); + return; + } + + exptime = now+delta; + wrmsr(MSR_IA32_TSCDEADLINE, now+delta); + apic_write(APIC_EOI, 0); +} + +static void start_tsc_deadline_timer(void) +{ + handle_irq(TSC_DEADLINE_TIMER_VECTOR, tsc_deadline_timer_isr); + irq_enable(); + + wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC)+delta); + asm volatile ("nop"); +} + +static int enable_tsc_deadline_timer(void) +{ + uint32_t lvtt; + + if (cpuid(1).c & (1 << 24)) { + lvtt = APIC_LVT_TIMER_TSCDEADLINE | TSC_DEADLINE_TIMER_VECTOR; + apic_write(APIC_LVTT, lvtt); + start_tsc_deadline_timer(); + return 1; + } else { + return 0; + } +} + +static void test_tsc_deadline_timer(void) +{ + if(enable_tsc_deadline_timer()) { + printf("tsc deadline timer enabled\n"); + } else { + printf("tsc deadline timer not detected, aborting\n"); + abort(); + } +} + +int main(int argc, char **argv) +{ + int i, size; + + setup_vm(); + smp_init(); + + test_lapic_existence(); + + mask_pic_interrupts(); + + delta = argc <= 1 ? 200000 : atol(argv[1]); + size = argc <= 2 ? TABLE_SIZE : atol(argv[2]); + breakmax = argc <= 3 ? 0 : atol(argv[3]); + printf("breakmax=%d\n", breakmax); + test_tsc_deadline_timer(); + irq_enable(); + + do { + asm volatile("hlt"); + } while (!hitmax && table_idx < size); + + for (i = 0; i < table_idx; i++) { + if (hitmax && i == table_idx-1) + printf("hit max: %d < ", breakmax); + printf("latency: %" PRId64 "\n", table[i]); + } + + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/types.h b/tests/kvm-unit-tests/x86/types.h new file mode 100644 index 00000000..fd227439 --- /dev/null +++ b/tests/kvm-unit-tests/x86/types.h @@ -0,0 +1,20 @@ +#ifndef __TYPES_H +#define __TYPES_H + +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + +#endif diff --git a/tests/kvm-unit-tests/x86/unittests.cfg b/tests/kvm-unit-tests/x86/unittests.cfg new file mode 100644 index 00000000..54138388 --- /dev/null +++ b/tests/kvm-unit-tests/x86/unittests.cfg @@ -0,0 +1,226 @@ +############################################################################## +# unittest configuration +# +# [unittest_name] +# file = .flat # Name of the flat file to be used. +# smp = # Number of processors the VM will use +# # during this test. Use $MAX_SMP to use +# # the maximum the host supports. Defaults +# # to one. +# extra_params = -append # Additional parameters used. +# arch = i386|x86_64 # Select one if the test case is +# # specific to only one. +# groups = ... # Used to identify test cases +# # with run_tests -g ... +# # Specify group_name=nodefault +# # to have test not run by +# # default +# accel = kvm|tcg # Optionally specify if test must run with +# # kvm or tcg. If not specified, then kvm will +# # be used when available. +# timeout = # Optionally specify a timeout. +# check = = # check a file for a particular value before running +# # a test. The check line can contain multiple files +# # to check separated by a space but each check +# # parameter needs to be of the form = +############################################################################## + +[apic-split] +file = apic.flat +smp = 2 +extra_params = -cpu qemu64,+x2apic,+tsc-deadline -machine kernel_irqchip=split +arch = x86_64 + +[ioapic-split] +file = ioapic.flat +extra_params = -cpu qemu64 -machine kernel_irqchip=split +arch = x86_64 + +[apic] +file = apic.flat +smp = 2 +extra_params = -cpu qemu64,+x2apic,+tsc-deadline +arch = x86_64 +timeout = 30 + +[ioapic] +file = ioapic.flat +extra_params = -cpu qemu64 +arch = x86_64 + +[smptest] +file = smptest.flat +smp = 2 + +[smptest3] +file = smptest.flat +smp = 3 + +[vmexit_cpuid] +file = vmexit.flat +extra_params = -append 'cpuid' +groups = vmexit + +[vmexit_vmcall] +file = vmexit.flat +extra_params = -append 'vmcall' +groups = vmexit + +[vmexit_mov_from_cr8] +file = vmexit.flat +extra_params = -append 'mov_from_cr8' +groups = vmexit + +[vmexit_mov_to_cr8] +file = vmexit.flat +extra_params = -append 'mov_to_cr8' +groups = vmexit + +[vmexit_inl_pmtimer] +file = vmexit.flat +extra_params = -append 'inl_from_pmtimer' +groups = vmexit + +[vmexit_ipi] +file = vmexit.flat +smp = 2 +extra_params = -append 'ipi' +groups = vmexit + +[vmexit_ipi_halt] +file = vmexit.flat +smp = 2 +extra_params = -append 'ipi_halt' +groups = vmexit + +[vmexit_ple_round_robin] +file = vmexit.flat +extra_params = -append 'ple_round_robin' +groups = vmexit + +[access] +file = access.flat +arch = x86_64 + +[smap] +file = smap.flat +extra_params = -cpu host + +[pku] +file = pku.flat +arch = x86_64 +extra_params = -cpu host + +#[asyncpf] +#file = asyncpf.flat + +[emulator] +file = emulator.flat +arch = x86_64 + +[eventinj] +file = eventinj.flat + +[hypercall] +file = hypercall.flat + +[idt_test] +file = idt_test.flat +arch = x86_64 + +#[init] +#file = init.flat + +[msr] +file = msr.flat + +[pmu] +file = pmu.flat +extra_params = -cpu host +check = /proc/sys/kernel/nmi_watchdog=0 + +[port80] +file = port80.flat + +[realmode] +file = realmode.flat + +[s3] +file = s3.flat + +[sieve] +file = sieve.flat + +[tsc] +file = tsc.flat +extra_params = -cpu kvm64,+rdtscp + +[tsc_adjust] +file = tsc_adjust.flat +extra_params = -cpu host + +[xsave] +file = xsave.flat +arch = x86_64 +extra_params = -cpu host + +[rmap_chain] +file = rmap_chain.flat +arch = x86_64 + +[svm] +file = svm.flat +smp = 2 +extra_params = -cpu qemu64,+svm +arch = x86_64 + +[taskswitch] +file = taskswitch.flat +arch = i386 +groups = tasks + +[taskswitch2] +file = taskswitch2.flat +arch = i386 +groups = tasks + +[kvmclock_test] +file = kvmclock_test.flat +smp = 2 +extra_params = --append "10000000 `date +%s`" + +[pcid] +file = pcid.flat +extra_params = -cpu qemu64,+pcid +arch = x86_64 + +[vmx] +file = vmx.flat +extra_params = -cpu host,+vmx +arch = x86_64 + +[debug] +file = debug.flat +arch = x86_64 + +[hyperv_synic] +file = hyperv_synic.flat +smp = 2 +extra_params = -cpu kvm64,hv_synic -device hyperv-testdev + +[hyperv_stimer] +file = hyperv_stimer.flat +smp = 2 +extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev + +[hyperv_clock] +file = hyperv_clock.flat +smp = 2 +extra_params = -cpu kvm64,hv_time + +[intel_iommu] +file = intel-iommu.flat +arch = x86_64 +timeout = 30 +smp = 4 +extra_params = -M q35,kernel-irqchip=split -device intel-iommu,intremap=on,eim=off -device edu diff --git a/tests/kvm-unit-tests/x86/vmexit.c b/tests/kvm-unit-tests/x86/vmexit.c new file mode 100644 index 00000000..5b821b5e --- /dev/null +++ b/tests/kvm-unit-tests/x86/vmexit.c @@ -0,0 +1,536 @@ +#include "libcflat.h" +#include "smp.h" +#include "processor.h" +#include "atomic.h" +#include "pci.h" +#include "x86/vm.h" +#include "x86/desc.h" +#include "x86/acpi.h" +#include "x86/apic.h" +#include "x86/isr.h" + +#define IPI_TEST_VECTOR 0xb0 + +struct test { + void (*func)(void); + const char *name; + int (*valid)(void); + int parallel; + bool (*next)(struct test *); +}; + +#define GOAL (1ull << 30) + +static int nr_cpus; + +static void cpuid_test(void) +{ + asm volatile ("push %%"R "bx; cpuid; pop %%"R "bx" + : : : "eax", "ecx", "edx"); +} + +static void vmcall(void) +{ + unsigned long a = 0, b, c, d; + + asm volatile ("vmcall" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); +} + +#define MSR_TSC_ADJUST 0x3b +#define MSR_EFER 0xc0000080 +#define EFER_NX_MASK (1ull << 11) + +#ifdef __x86_64__ +static void mov_from_cr8(void) +{ + unsigned long cr8; + + asm volatile ("mov %%cr8, %0" : "=r"(cr8)); +} + +static void mov_to_cr8(void) +{ + unsigned long cr8 = 0; + + asm volatile ("mov %0, %%cr8" : : "r"(cr8)); +} +#endif + +static int is_smp(void) +{ + return cpu_count() > 1; +} + +static void nop(void *junk) +{ +} + +volatile int x = 0; + +static void self_ipi_isr(isr_regs_t *regs) +{ + x++; + eoi(); +} + +static void x2apic_self_ipi(int vec) +{ + wrmsr(0x83f, vec); +} + +static void apic_self_ipi(int vec) +{ + apic_icr_write(APIC_INT_ASSERT | APIC_DEST_SELF | APIC_DEST_PHYSICAL | + APIC_DM_FIXED | IPI_TEST_VECTOR, vec); +} + +static void self_ipi_sti_nop(void) +{ + x = 0; + irq_disable(); + apic_self_ipi(IPI_TEST_VECTOR); + asm volatile("sti; nop"); + if (x != 1) printf("%d", x); +} + +static void self_ipi_sti_hlt(void) +{ + x = 0; + irq_disable(); + apic_self_ipi(IPI_TEST_VECTOR); + asm volatile("sti; hlt"); + if (x != 1) printf("%d", x); +} + +static void self_ipi_tpr(void) +{ + x = 0; + apic_set_tpr(0x0f); + apic_self_ipi(IPI_TEST_VECTOR); + apic_set_tpr(0x00); + asm volatile("nop"); + if (x != 1) printf("%d", x); +} + +static void self_ipi_tpr_sti_nop(void) +{ + x = 0; + irq_disable(); + apic_set_tpr(0x0f); + apic_self_ipi(IPI_TEST_VECTOR); + apic_set_tpr(0x00); + asm volatile("sti; nop"); + if (x != 1) printf("%d", x); +} + +static void self_ipi_tpr_sti_hlt(void) +{ + x = 0; + irq_disable(); + apic_set_tpr(0x0f); + apic_self_ipi(IPI_TEST_VECTOR); + apic_set_tpr(0x00); + asm volatile("sti; hlt"); + if (x != 1) printf("%d", x); +} + +static int is_x2apic(void) +{ + return rdmsr(MSR_IA32_APICBASE) & APIC_EXTD; +} + +static void x2apic_self_ipi_sti_nop(void) +{ + irq_disable(); + x2apic_self_ipi(IPI_TEST_VECTOR); + asm volatile("sti; nop"); +} + +static void x2apic_self_ipi_sti_hlt(void) +{ + irq_disable(); + x2apic_self_ipi(IPI_TEST_VECTOR); + asm volatile("sti; hlt"); +} + +static void x2apic_self_ipi_tpr(void) +{ + apic_set_tpr(0x0f); + x2apic_self_ipi(IPI_TEST_VECTOR); + apic_set_tpr(0x00); + asm volatile("nop"); +} + +static void x2apic_self_ipi_tpr_sti_nop(void) +{ + irq_disable(); + apic_set_tpr(0x0f); + x2apic_self_ipi(IPI_TEST_VECTOR); + apic_set_tpr(0x00); + asm volatile("sti; nop"); +} + +static void x2apic_self_ipi_tpr_sti_hlt(void) +{ + irq_disable(); + apic_set_tpr(0x0f); + x2apic_self_ipi(IPI_TEST_VECTOR); + apic_set_tpr(0x00); + asm volatile("sti; hlt"); +} + +static void ipi(void) +{ + on_cpu(1, nop, 0); +} + +static void ipi_halt(void) +{ + unsigned long long t; + + on_cpu(1, nop, 0); + t = rdtsc() + 2000; + while (rdtsc() < t) + ; +} + +int pm_tmr_blk; +static void inl_pmtimer(void) +{ + inl(pm_tmr_blk); +} + +static void inl_nop_qemu(void) +{ + inl(0x1234); +} + +static void inl_nop_kernel(void) +{ + inb(0x4d0); +} + +static void outl_elcr_kernel(void) +{ + outb(0, 0x4d0); +} + +static void mov_dr(void) +{ + asm volatile("mov %0, %%dr7" : : "r" (0x400L)); +} + +static void ple_round_robin(void) +{ + struct counter { + volatile int n1; + int n2; + } __attribute__((aligned(64))); + static struct counter counters[64] = { { -1, 0 } }; + int me = smp_id(); + int you; + volatile struct counter *p = &counters[me]; + + while (p->n1 == p->n2) + asm volatile ("pause"); + + p->n2 = p->n1; + you = me + 1; + if (you == nr_cpus) + you = 0; + ++counters[you].n1; +} + +static void rd_tsc_adjust_msr(void) +{ + rdmsr(MSR_TSC_ADJUST); +} + +static void wr_tsc_adjust_msr(void) +{ + wrmsr(MSR_TSC_ADJUST, 0x0); +} + +static struct pci_test { + unsigned iobar; + unsigned ioport; + volatile void *memaddr; + volatile void *mem; + int test_idx; + uint32_t data; + uint32_t offset; +} pci_test = { + .test_idx = -1 +}; + +static void pci_mem_testb(void) +{ + *(volatile uint8_t *)pci_test.mem = pci_test.data; +} + +static void pci_mem_testw(void) +{ + *(volatile uint16_t *)pci_test.mem = pci_test.data; +} + +static void pci_mem_testl(void) +{ + *(volatile uint32_t *)pci_test.mem = pci_test.data; +} + +static void pci_io_testb(void) +{ + outb(pci_test.data, pci_test.ioport); +} + +static void pci_io_testw(void) +{ + outw(pci_test.data, pci_test.ioport); +} + +static void pci_io_testl(void) +{ + outl(pci_test.data, pci_test.ioport); +} + +static uint8_t ioreadb(unsigned long addr, bool io) +{ + if (io) { + return inb(addr); + } else { + return *(volatile uint8_t *)addr; + } +} + +static uint32_t ioreadl(unsigned long addr, bool io) +{ + /* Note: assumes little endian */ + if (io) { + return inl(addr); + } else { + return *(volatile uint32_t *)addr; + } +} + +static void iowriteb(unsigned long addr, uint8_t data, bool io) +{ + if (io) { + outb(data, addr); + } else { + *(volatile uint8_t *)addr = data; + } +} + +static bool pci_next(struct test *test, unsigned long addr, bool io) +{ + int i; + uint8_t width; + + if (!pci_test.memaddr) { + test->func = NULL; + return true; + } + pci_test.test_idx++; + iowriteb(addr + offsetof(struct pci_test_dev_hdr, test), + pci_test.test_idx, io); + width = ioreadb(addr + offsetof(struct pci_test_dev_hdr, width), + io); + switch (width) { + case 1: + test->func = io ? pci_io_testb : pci_mem_testb; + break; + case 2: + test->func = io ? pci_io_testw : pci_mem_testw; + break; + case 4: + test->func = io ? pci_io_testl : pci_mem_testl; + break; + default: + /* Reset index for purposes of the next test */ + pci_test.test_idx = -1; + test->func = NULL; + return false; + } + pci_test.data = ioreadl(addr + offsetof(struct pci_test_dev_hdr, data), + io); + pci_test.offset = ioreadl(addr + offsetof(struct pci_test_dev_hdr, + offset), io); + for (i = 0; i < pci_test.offset; ++i) { + char c = ioreadb(addr + offsetof(struct pci_test_dev_hdr, + name) + i, io); + if (!c) { + break; + } + printf("%c",c); + } + printf(":"); + return true; +} + +static bool pci_mem_next(struct test *test) +{ + bool ret; + ret = pci_next(test, ((unsigned long)pci_test.memaddr), false); + if (ret) { + pci_test.mem = pci_test.memaddr + pci_test.offset; + } + return ret; +} + +static bool pci_io_next(struct test *test) +{ + bool ret; + ret = pci_next(test, ((unsigned long)pci_test.iobar), true); + if (ret) { + pci_test.ioport = pci_test.iobar + pci_test.offset; + } + return ret; +} + +static struct test tests[] = { + { cpuid_test, "cpuid", .parallel = 1, }, + { vmcall, "vmcall", .parallel = 1, }, +#ifdef __x86_64__ + { mov_from_cr8, "mov_from_cr8", .parallel = 1, }, + { mov_to_cr8, "mov_to_cr8" , .parallel = 1, }, +#endif + { inl_pmtimer, "inl_from_pmtimer", .parallel = 1, }, + { inl_nop_qemu, "inl_from_qemu", .parallel = 1 }, + { inl_nop_kernel, "inl_from_kernel", .parallel = 1 }, + { outl_elcr_kernel, "outl_to_kernel", .parallel = 1 }, + { mov_dr, "mov_dr", .parallel = 1 }, + { self_ipi_sti_nop, "self_ipi_sti_nop", .parallel = 0, }, + { self_ipi_sti_hlt, "self_ipi_sti_hlt", .parallel = 0, }, + { self_ipi_tpr, "self_ipi_tpr", .parallel = 0, }, + { self_ipi_tpr_sti_nop, "self_ipi_tpr_sti_nop", .parallel = 0, }, + { self_ipi_tpr_sti_hlt, "self_ipi_tpr_sti_hlt", .parallel = 0, }, + { x2apic_self_ipi_sti_nop, "x2apic_self_ipi_sti_nop", is_x2apic, .parallel = 0, }, + { x2apic_self_ipi_sti_hlt, "x2apic_self_ipi_sti_hlt", is_x2apic, .parallel = 0, }, + { x2apic_self_ipi_tpr, "x2apic_self_ipi_tpr", is_x2apic, .parallel = 0, }, + { x2apic_self_ipi_tpr_sti_nop, "x2apic_self_ipi_tpr_sti_nop", is_x2apic, .parallel = 0, }, + { x2apic_self_ipi_tpr_sti_hlt, "x2apic_self_ipi_tpr_sti_hlt", is_x2apic, .parallel = 0, }, + { ipi, "ipi", is_smp, .parallel = 0, }, + { ipi_halt, "ipi+halt", is_smp, .parallel = 0, }, + { ple_round_robin, "ple-round-robin", .parallel = 1 }, + { wr_tsc_adjust_msr, "wr_tsc_adjust_msr", .parallel = 1 }, + { rd_tsc_adjust_msr, "rd_tsc_adjust_msr", .parallel = 1 }, + { NULL, "pci-mem", .parallel = 0, .next = pci_mem_next }, + { NULL, "pci-io", .parallel = 0, .next = pci_io_next }, +}; + +unsigned iterations; +static atomic_t nr_cpus_done; + +static void run_test(void *_func) +{ + int i; + void (*func)(void) = _func; + + for (i = 0; i < iterations; ++i) + func(); + + atomic_inc(&nr_cpus_done); +} + +static bool do_test(struct test *test) +{ + int i; + unsigned long long t1, t2; + void (*func)(void); + + iterations = 32; + + if (test->valid && !test->valid()) { + printf("%s (skipped)\n", test->name); + return false; + } + + if (test->next && !test->next(test)) { + return false; + } + + func = test->func; + if (!func) { + printf("%s (skipped)\n", test->name); + return false; + } + + do { + iterations *= 2; + t1 = rdtsc(); + + if (!test->parallel) { + for (i = 0; i < iterations; ++i) + func(); + } else { + atomic_set(&nr_cpus_done, 0); + for (i = cpu_count(); i > 0; i--) + on_cpu_async(i-1, run_test, func); + while (atomic_read(&nr_cpus_done) < cpu_count()) + ; + } + t2 = rdtsc(); + } while ((t2 - t1) < GOAL); + printf("%s %d\n", test->name, (int)((t2 - t1) / iterations)); + return test->next; +} + +static void enable_nx(void *junk) +{ + if (cpuid(0x80000001).d & (1 << 20)) + wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); +} + +bool test_wanted(struct test *test, char *wanted[], int nwanted) +{ + int i; + + if (!nwanted) + return true; + + for (i = 0; i < nwanted; ++i) + if (strcmp(wanted[i], test->name) == 0) + return true; + + return false; +} + +int main(int ac, char **av) +{ + struct fadt_descriptor_rev1 *fadt; + int i; + unsigned long membar = 0; + struct pci_dev pcidev; + int ret; + + smp_init(); + setup_vm(); + handle_irq(IPI_TEST_VECTOR, self_ipi_isr); + nr_cpus = cpu_count(); + + irq_enable(); + for (i = cpu_count(); i > 0; i--) + on_cpu(i-1, enable_nx, 0); + + fadt = find_acpi_table_addr(FACP_SIGNATURE); + pm_tmr_blk = fadt->pm_tmr_blk; + printf("PM timer port is %x\n", pm_tmr_blk); + + ret = pci_find_dev(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_TEST); + if (ret != PCIDEVADDR_INVALID) { + pci_dev_init(&pcidev, ret); + assert(pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_MEM)); + assert(!pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_IO)); + membar = pcidev.resource[PCI_TESTDEV_BAR_MEM]; + pci_test.memaddr = ioremap(membar, PAGE_SIZE); + pci_test.iobar = pcidev.resource[PCI_TESTDEV_BAR_IO]; + printf("pci-testdev at 0x%x membar %lx iobar %x\n", + pcidev.bdf, membar, pci_test.iobar); + } + + for (i = 0; i < ARRAY_SIZE(tests); ++i) + if (test_wanted(&tests[i], av + 1, ac - 1)) + while (do_test(&tests[i])) {} + + return 0; +} diff --git a/tests/kvm-unit-tests/x86/vmx.c b/tests/kvm-unit-tests/x86/vmx.c new file mode 100644 index 00000000..da5daa86 --- /dev/null +++ b/tests/kvm-unit-tests/x86/vmx.c @@ -0,0 +1,1098 @@ +/* + * x86/vmx.c : Framework for testing nested virtualization + * This is a framework to test nested VMX for KVM, which + * started as a project of GSoC 2013. All test cases should + * be located in x86/vmx_tests.c and framework related + * functions should be in this file. + * + * How to write test cases? + * Add callbacks of test suite in variant "vmx_tests". You can + * write: + * 1. init function used for initializing test suite + * 2. main function for codes running in L2 guest, + * 3. exit_handler to handle vmexit of L2 to L1 + * 4. syscall handler to handle L2 syscall vmexit + * 5. vmenter fail handler to handle direct failure of vmenter + * 6. guest_regs is loaded when vmenter and saved when + * vmexit, you can read and set it in exit_handler + * If no special function is needed for a test suite, use + * coressponding basic_* functions as callback. More handlers + * can be added to "vmx_tests", see details of "struct vmx_test" + * and function test_run(). + * + * Currently, vmx test framework only set up one VCPU and one + * concurrent guest test environment with same paging for L2 and + * L1. For usage of EPT, only 1:1 mapped paging is used from VFN + * to PFN. + * + * Author : Arthur Chunqi Li + */ + +#include "libcflat.h" +#include "processor.h" +#include "vm.h" +#include "desc.h" +#include "vmx.h" +#include "msr.h" +#include "smp.h" + +u64 *vmxon_region; +struct vmcs *vmcs_root; +u32 vpid_cnt; +void *guest_stack, *guest_syscall_stack; +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; +struct regs regs; +struct vmx_test *current; +u64 hypercall_field; +bool launched; + +union vmx_basic basic; +union vmx_ctrl_msr ctrl_pin_rev; +union vmx_ctrl_msr ctrl_cpu_rev[2]; +union vmx_ctrl_msr ctrl_exit_rev; +union vmx_ctrl_msr ctrl_enter_rev; +union vmx_ept_vpid ept_vpid; + +extern struct descriptor_table_ptr gdt64_desc; +extern struct descriptor_table_ptr idt_descr; +extern struct descriptor_table_ptr tss_descr; +extern void *vmx_return; +extern void *entry_sysenter; +extern void *guest_entry; + +static volatile u32 stage; + +void vmx_set_test_stage(u32 s) +{ + barrier(); + stage = s; + barrier(); +} + +u32 vmx_get_test_stage(void) +{ + u32 s; + + barrier(); + s = stage; + barrier(); + return s; +} + +void vmx_inc_test_stage(void) +{ + barrier(); + stage++; + barrier(); +} + +static int make_vmcs_current(struct vmcs *vmcs) +{ + bool ret; + u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + + asm volatile ("push %1; popf; vmptrld %2; setbe %0" + : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc"); + return ret; +} + +/* entry_sysenter */ +asm( + ".align 4, 0x90\n\t" + ".globl entry_sysenter\n\t" + "entry_sysenter:\n\t" + SAVE_GPR + " and $0xf, %rax\n\t" + " mov %rax, %rdi\n\t" + " call syscall_handler\n\t" + LOAD_GPR + " vmresume\n\t" +); + +static void __attribute__((__used__)) syscall_handler(u64 syscall_no) +{ + if (current->syscall_handler) + current->syscall_handler(syscall_no); +} + +static inline int vmx_on() +{ + bool ret; + u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t" + : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc"); + return ret; +} + +static inline int vmx_off() +{ + bool ret; + u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + + asm volatile("push %1; popf; vmxoff; setbe %0\n\t" + : "=q"(ret) : "q" (rflags) : "cc"); + return ret; +} + +void print_vmexit_info() +{ + u64 guest_rip, guest_rsp; + ulong reason = vmcs_read(EXI_REASON) & 0xff; + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); + guest_rip = vmcs_read(GUEST_RIP); + guest_rsp = vmcs_read(GUEST_RSP); + printf("VMEXIT info:\n"); + printf("\tvmexit reason = %ld\n", reason); + printf("\texit qualification = 0x%lx\n", exit_qual); + printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1); + printf("\tguest_rip = 0x%lx\n", guest_rip); + printf("\tRAX=0x%lx RBX=0x%lx RCX=0x%lx RDX=0x%lx\n", + regs.rax, regs.rbx, regs.rcx, regs.rdx); + printf("\tRSP=0x%lx RBP=0x%lx RSI=0x%lx RDI=0x%lx\n", + guest_rsp, regs.rbp, regs.rsi, regs.rdi); + printf("\tR8 =0x%lx R9 =0x%lx R10=0x%lx R11=0x%lx\n", + regs.r8, regs.r9, regs.r10, regs.r11); + printf("\tR12=0x%lx R13=0x%lx R14=0x%lx R15=0x%lx\n", + regs.r12, regs.r13, regs.r14, regs.r15); +} + +void +print_vmentry_failure_info(struct vmentry_failure *failure) { + if (failure->early) { + printf("Early %s failure: ", failure->instr); + switch (failure->flags & VMX_ENTRY_FLAGS) { + case X86_EFLAGS_CF: + printf("current-VMCS pointer is not valid.\n"); + break; + case X86_EFLAGS_ZF: + printf("error number is %ld. See Intel 30.4.\n", + vmcs_read(VMX_INST_ERROR)); + break; + default: + printf("unexpected flags %lx!\n", failure->flags); + } + } else { + u64 reason = vmcs_read(EXI_REASON); + u64 qual = vmcs_read(EXI_QUALIFICATION); + + printf("Non-early %s failure (reason=0x%lx, qual=0x%lx): ", + failure->instr, reason, qual); + + switch (reason & 0xff) { + case VMX_FAIL_STATE: + printf("invalid guest state\n"); + break; + case VMX_FAIL_MSR: + printf("MSR loading\n"); + break; + case VMX_FAIL_MCHECK: + printf("machine-check event\n"); + break; + default: + printf("unexpected basic exit reason %ld\n", + reason & 0xff); + } + + if (!(reason & VMX_ENTRY_FAILURE)) + printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); + + if (reason & 0x7fff0000) + printf("\tRESERVED BITS SET!\n"); + } +} + + +static void test_vmclear(void) +{ + struct vmcs *tmp_root; + int width = cpuid_maxphyaddr(); + + /* + * Note- The tests below do not necessarily have a + * valid VMCS, but that's ok since the invalid vmcs + * is only used for a specific test and is discarded + * without touching its contents + */ + + /* Unaligned page access */ + tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); + report("test vmclear with unaligned vmcs", + vmcs_clear(tmp_root) == 1); + + /* gpa bits beyond physical address width are set*/ + tmp_root = (struct vmcs *)((intptr_t)vmcs_root | + ((u64)1 << (width+1))); + report("test vmclear with vmcs address bits set beyond physical address width", + vmcs_clear(tmp_root) == 1); + + /* Pass VMXON region */ + tmp_root = (struct vmcs *)vmxon_region; + report("test vmclear with vmxon region", + vmcs_clear(tmp_root) == 1); + + /* Valid VMCS */ + report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0); + +} + +static void test_vmxoff(void) +{ + int ret; + + ret = vmx_off(); + report("test vmxoff", !ret); +} + +static void __attribute__((__used__)) guest_main(void) +{ + current->guest_main(); +} + +/* guest_entry */ +asm( + ".align 4, 0x90\n\t" + ".globl entry_guest\n\t" + "guest_entry:\n\t" + " call guest_main\n\t" + " mov $1, %edi\n\t" + " call hypercall\n\t" +); + +/* EPT paging structure related functions */ +/* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. + @ptep : large page table entry to split + @level : level of ptep (2 or 3) + */ +static void split_large_ept_entry(unsigned long *ptep, int level) +{ + unsigned long *new_pt; + unsigned long gpa; + unsigned long pte; + unsigned long prototype; + int i; + + pte = *ptep; + assert(pte & EPT_PRESENT); + assert(pte & EPT_LARGE_PAGE); + assert(level == 2 || level == 3); + + new_pt = alloc_page(); + assert(new_pt); + memset(new_pt, 0, PAGE_SIZE); + + prototype = pte & ~EPT_ADDR_MASK; + if (level == 2) + prototype &= ~EPT_LARGE_PAGE; + + gpa = pte & EPT_ADDR_MASK; + for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { + new_pt[i] = prototype | gpa; + gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); + } + + pte &= ~EPT_LARGE_PAGE; + pte &= ~EPT_ADDR_MASK; + pte |= virt_to_phys(new_pt); + + *ptep = pte; +} + +/* install_ept_entry : Install a page to a given level in EPT + @pml4 : addr of pml4 table + @pte_level : level of PTE to set + @guest_addr : physical address of guest + @pte : pte value to set + @pt_page : address of page table, NULL for a new page + */ +void install_ept_entry(unsigned long *pml4, + int pte_level, + unsigned long guest_addr, + unsigned long pte, + unsigned long *pt_page) +{ + int level; + unsigned long *pt = pml4; + unsigned offset; + + for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { + offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) + & EPT_PGDIR_MASK; + if (!(pt[offset] & (EPT_PRESENT))) { + unsigned long *new_pt = pt_page; + if (!new_pt) + new_pt = alloc_page(); + else + pt_page = 0; + memset(new_pt, 0, PAGE_SIZE); + pt[offset] = virt_to_phys(new_pt) + | EPT_RA | EPT_WA | EPT_EA; + } else if (pt[offset] & EPT_LARGE_PAGE) + split_large_ept_entry(&pt[offset], level); + pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); + } + offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; + pt[offset] = pte; +} + +/* Map a page, @perm is the permission of the page */ +void install_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); +} + +/* Map a 1G-size page */ +void install_1g_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 3, guest_addr, + (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); +} + +/* Map a 2M-size page */ +void install_2m_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 2, guest_addr, + (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); +} + +/* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. + @start : start address of guest page + @len : length of address to be mapped + @map_1g : whether 1G page map is used + @map_2m : whether 2M page map is used + @perm : permission for every page + */ +void setup_ept_range(unsigned long *pml4, unsigned long start, + unsigned long len, int map_1g, int map_2m, u64 perm) +{ + u64 phys = start; + u64 max = (u64)len + (u64)start; + + if (map_1g) { + while (phys + PAGE_SIZE_1G <= max) { + install_1g_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE_1G; + } + } + if (map_2m) { + while (phys + PAGE_SIZE_2M <= max) { + install_2m_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE_2M; + } + } + while (phys + PAGE_SIZE <= max) { + install_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE; + } +} + +/* get_ept_pte : Get the PTE of a given level in EPT, + @level == 1 means get the latest level*/ +unsigned long get_ept_pte(unsigned long *pml4, + unsigned long guest_addr, int level) +{ + int l; + unsigned long *pt = pml4, pte; + unsigned offset; + + if (level < 1 || level > 3) + return -1; + for (l = EPT_PAGE_LEVEL; ; --l) { + offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; + pte = pt[offset]; + if (!(pte & (EPT_PRESENT))) + return 0; + if (l == level) + break; + if (l < 4 && (pte & EPT_LARGE_PAGE)) + return pte; + pt = (unsigned long *)(pte & EPT_ADDR_MASK); + } + offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; + pte = pt[offset]; + return pte; +} + +void ept_sync(int type, u64 eptp) +{ + switch (type) { + case INVEPT_SINGLE: + if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) { + invept(INVEPT_SINGLE, eptp); + break; + } + /* else fall through */ + case INVEPT_GLOBAL: + if (ept_vpid.val & EPT_CAP_INVEPT_ALL) { + invept(INVEPT_GLOBAL, eptp); + break; + } + /* else fall through */ + default: + printf("WARNING: invept is not supported!\n"); + } +} + +int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, + int level, u64 pte_val) +{ + int l; + unsigned long *pt = pml4; + unsigned offset; + + if (level < 1 || level > 3) + return -1; + for (l = EPT_PAGE_LEVEL; ; --l) { + offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; + if (l == level) + break; + if (!(pt[offset] & (EPT_PRESENT))) + return -1; + pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); + } + offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; + pt[offset] = pte_val; + return 0; +} + +void vpid_sync(int type, u16 vpid) +{ + switch(type) { + case INVVPID_SINGLE: + if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) { + invvpid(INVVPID_SINGLE, vpid, 0); + break; + } + case INVVPID_ALL: + if (ept_vpid.val & VPID_CAP_INVVPID_ALL) { + invvpid(INVVPID_ALL, vpid, 0); + break; + } + default: + printf("WARNING: invvpid is not supported\n"); + } +} + +static void init_vmcs_ctrl(void) +{ + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ + /* 26.2.1.1 */ + vmcs_write(PIN_CONTROLS, ctrl_pin); + /* Disable VMEXIT of IO instruction */ + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { + ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & + ctrl_cpu_rev[1].clr; + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); + } + vmcs_write(CR3_TARGET_COUNT, 0); + vmcs_write(VPID, ++vpid_cnt); +} + +static void init_vmcs_host(void) +{ + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ + /* 26.2.1.2 */ + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); + + /* 26.2.1.3 */ + vmcs_write(ENT_CONTROLS, ctrl_enter); + vmcs_write(EXI_CONTROLS, ctrl_exit); + + /* 26.2.2 */ + vmcs_write(HOST_CR0, read_cr0()); + vmcs_write(HOST_CR3, read_cr3()); + vmcs_write(HOST_CR4, read_cr4()); + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); + vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); + + /* 26.2.3 */ + vmcs_write(HOST_SEL_CS, KERNEL_CS); + vmcs_write(HOST_SEL_SS, KERNEL_DS); + vmcs_write(HOST_SEL_DS, KERNEL_DS); + vmcs_write(HOST_SEL_ES, KERNEL_DS); + vmcs_write(HOST_SEL_FS, KERNEL_DS); + vmcs_write(HOST_SEL_GS, KERNEL_DS); + vmcs_write(HOST_SEL_TR, TSS_MAIN); + vmcs_write(HOST_BASE_TR, tss_descr.base); + vmcs_write(HOST_BASE_GDTR, gdt64_desc.base); + vmcs_write(HOST_BASE_IDTR, idt_descr.base); + vmcs_write(HOST_BASE_FS, 0); + vmcs_write(HOST_BASE_GS, 0); + + /* Set other vmcs area */ + vmcs_write(PF_ERROR_MASK, 0); + vmcs_write(PF_ERROR_MATCH, 0); + vmcs_write(VMCS_LINK_PTR, ~0ul); + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); + vmcs_write(HOST_RIP, (u64)(&vmx_return)); +} + +static void init_vmcs_guest(void) +{ + /* 26.3 CHECKING AND LOADING GUEST STATE */ + ulong guest_cr0, guest_cr4, guest_cr3; + /* 26.3.1.1 */ + guest_cr0 = read_cr0(); + guest_cr4 = read_cr4(); + guest_cr3 = read_cr3(); + if (ctrl_enter & ENT_GUEST_64) { + guest_cr0 |= X86_CR0_PG; + guest_cr4 |= X86_CR4_PAE; + } + if ((ctrl_enter & ENT_GUEST_64) == 0) + guest_cr4 &= (~X86_CR4_PCIDE); + if (guest_cr0 & X86_CR0_PG) + guest_cr0 |= X86_CR0_PE; + vmcs_write(GUEST_CR0, guest_cr0); + vmcs_write(GUEST_CR3, guest_cr3); + vmcs_write(GUEST_CR4, guest_cr4); + vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); + vmcs_write(GUEST_SYSENTER_ESP, + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); + vmcs_write(GUEST_DR7, 0); + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); + + /* 26.3.1.2 */ + vmcs_write(GUEST_SEL_CS, KERNEL_CS); + vmcs_write(GUEST_SEL_SS, KERNEL_DS); + vmcs_write(GUEST_SEL_DS, KERNEL_DS); + vmcs_write(GUEST_SEL_ES, KERNEL_DS); + vmcs_write(GUEST_SEL_FS, KERNEL_DS); + vmcs_write(GUEST_SEL_GS, KERNEL_DS); + vmcs_write(GUEST_SEL_TR, TSS_MAIN); + vmcs_write(GUEST_SEL_LDTR, 0); + + vmcs_write(GUEST_BASE_CS, 0); + vmcs_write(GUEST_BASE_ES, 0); + vmcs_write(GUEST_BASE_SS, 0); + vmcs_write(GUEST_BASE_DS, 0); + vmcs_write(GUEST_BASE_FS, 0); + vmcs_write(GUEST_BASE_GS, 0); + vmcs_write(GUEST_BASE_TR, tss_descr.base); + vmcs_write(GUEST_BASE_LDTR, 0); + + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); + vmcs_write(GUEST_LIMIT_TR, tss_descr.limit); + + vmcs_write(GUEST_AR_CS, 0xa09b); + vmcs_write(GUEST_AR_DS, 0xc093); + vmcs_write(GUEST_AR_ES, 0xc093); + vmcs_write(GUEST_AR_FS, 0xc093); + vmcs_write(GUEST_AR_GS, 0xc093); + vmcs_write(GUEST_AR_SS, 0xc093); + vmcs_write(GUEST_AR_LDTR, 0x82); + vmcs_write(GUEST_AR_TR, 0x8b); + + /* 26.3.1.3 */ + vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base); + vmcs_write(GUEST_BASE_IDTR, idt_descr.base); + vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit); + vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); + + /* 26.3.1.4 */ + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); + vmcs_write(GUEST_RFLAGS, 0x2); + + /* 26.3.1.5 */ + vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); + vmcs_write(GUEST_INTR_STATE, 0); +} + +static int init_vmcs(struct vmcs **vmcs) +{ + *vmcs = alloc_page(); + memset(*vmcs, 0, PAGE_SIZE); + (*vmcs)->revision_id = basic.revision; + /* vmclear first to init vmcs */ + if (vmcs_clear(*vmcs)) { + printf("%s : vmcs_clear error\n", __func__); + return 1; + } + + if (make_vmcs_current(*vmcs)) { + printf("%s : make_vmcs_current error\n", __func__); + return 1; + } + + /* All settings to pin/exit/enter/cpu + control fields should be placed here */ + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); + /* DIsable IO instruction VMEXIT now */ + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); + ctrl_cpu[1] = 0; + + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; + + init_vmcs_ctrl(); + init_vmcs_host(); + init_vmcs_guest(); + return 0; +} + +static void init_vmx(void) +{ + ulong fix_cr0_set, fix_cr0_clr; + ulong fix_cr4_set, fix_cr4_clr; + + vmxon_region = alloc_page(); + memset(vmxon_region, 0, PAGE_SIZE); + + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); + basic.val = rdmsr(MSR_IA32_VMX_BASIC); + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN + : MSR_IA32_VMX_PINBASED_CTLS); + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT + : MSR_IA32_VMX_EXIT_CTLS); + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY + : MSR_IA32_VMX_ENTRY_CTLS); + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC + : MSR_IA32_VMX_PROCBASED_CTLS); + if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); + else + ctrl_cpu_rev[1].val = 0; + if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + else + ept_vpid.val = 0; + + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); + + *vmxon_region = basic.revision; + + guest_stack = alloc_page(); + memset(guest_stack, 0, PAGE_SIZE); + guest_syscall_stack = alloc_page(); + memset(guest_syscall_stack, 0, PAGE_SIZE); +} + +static void do_vmxon_off(void *data) +{ + vmx_on(); + vmx_off(); +} + +static void do_write_feature_control(void *data) +{ + wrmsr(MSR_IA32_FEATURE_CONTROL, 0); +} + +static int test_vmx_feature_control(void) +{ + u64 ia32_feature_control; + bool vmx_enabled; + + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + vmx_enabled = ((ia32_feature_control & 0x5) == 0x5); + if ((ia32_feature_control & 0x5) == 0x5) { + printf("VMX enabled and locked by BIOS\n"); + return 0; + } else if (ia32_feature_control & 0x1) { + printf("ERROR: VMX locked out by BIOS!?\n"); + return 1; + } + + wrmsr(MSR_IA32_FEATURE_CONTROL, 0); + report("test vmxon with FEATURE_CONTROL cleared", + test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); + + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4); + report("test vmxon without FEATURE_CONTROL lock", + test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); + + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); + vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5); + report("test enable VMX in FEATURE_CONTROL", vmx_enabled); + + report("test FEATURE_CONTROL lock bit", + test_for_exception(GP_VECTOR, &do_write_feature_control, NULL)); + + return !vmx_enabled; +} + +static int test_vmxon(void) +{ + int ret, ret1; + u64 *tmp_region = vmxon_region; + int width = cpuid_maxphyaddr(); + + /* Unaligned page access */ + vmxon_region = (u64 *)((intptr_t)vmxon_region + 1); + ret1 = vmx_on(); + report("test vmxon with unaligned vmxon region", ret1); + if (!ret1) { + ret = 1; + goto out; + } + + /* gpa bits beyond physical address width are set*/ + vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1))); + ret1 = vmx_on(); + report("test vmxon with bits set beyond physical address width", ret1); + if (!ret1) { + ret = 1; + goto out; + } + + /* invalid revision indentifier */ + vmxon_region = tmp_region; + *vmxon_region = 0xba9da9; + ret1 = vmx_on(); + report("test vmxon with invalid revision identifier", ret1); + if (!ret1) { + ret = 1; + goto out; + } + + /* and finally a valid region */ + *vmxon_region = basic.revision; + ret = vmx_on(); + report("test vmxon with valid vmxon region", !ret); + +out: + return ret; +} + +static void test_vmptrld(void) +{ + struct vmcs *vmcs, *tmp_root; + int width = cpuid_maxphyaddr(); + + vmcs = alloc_page(); + vmcs->revision_id = basic.revision; + + /* Unaligned page access */ + tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); + report("test vmptrld with unaligned vmcs", + make_vmcs_current(tmp_root) == 1); + + /* gpa bits beyond physical address width are set*/ + tmp_root = (struct vmcs *)((intptr_t)vmcs | + ((u64)1 << (width+1))); + report("test vmptrld with vmcs address bits set beyond physical address width", + make_vmcs_current(tmp_root) == 1); + + /* Pass VMXON region */ + tmp_root = (struct vmcs *)vmxon_region; + report("test vmptrld with vmxon region", + make_vmcs_current(tmp_root) == 1); + + report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0); +} + +static void test_vmptrst(void) +{ + int ret; + struct vmcs *vmcs1, *vmcs2; + + vmcs1 = alloc_page(); + memset(vmcs1, 0, PAGE_SIZE); + init_vmcs(&vmcs1); + ret = vmcs_save(&vmcs2); + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); +} + +struct vmx_ctl_msr { + const char *name; + u32 index, true_index; + u32 default1; +} vmx_ctl_msr[] = { + { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, + MSR_IA32_VMX_TRUE_PIN, 0x16 }, + { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, + MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, + { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, + MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, + { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, + MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, + { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, + MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, +}; + +static void test_vmx_caps(void) +{ + u64 val, default1, fixed0, fixed1; + union vmx_ctrl_msr ctrl, true_ctrl; + unsigned int n; + bool ok; + + printf("\nTest suite: VMX capability reporting\n"); + + report("MSR_IA32_VMX_BASIC", + (basic.revision & (1ul << 31)) == 0 && + basic.size > 0 && basic.size <= 4096 && + (basic.type == 0 || basic.type == 6) && + basic.reserved1 == 0 && basic.reserved2 == 0); + + val = rdmsr(MSR_IA32_VMX_MISC); + report("MSR_IA32_VMX_MISC", + (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && + ((val >> 16) & 0x1ff) <= 256 && + (val & 0xc0007e00) == 0); + + for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { + ctrl.val = rdmsr(vmx_ctl_msr[n].index); + default1 = vmx_ctl_msr[n].default1; + ok = (ctrl.set & default1) == default1; + ok = ok && (ctrl.set & ~ctrl.clr) == 0; + if (ok && basic.ctrl) { + true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); + ok = ctrl.clr == true_ctrl.clr; + ok = ok && ctrl.set == (true_ctrl.set | default1); + } + report(vmx_ctl_msr[n].name, ok); + } + + fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); + fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); + report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1", + ((fixed0 ^ fixed1) & ~fixed1) == 0); + + fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); + fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); + report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1", + ((fixed0 ^ fixed1) & ~fixed1) == 0); + + val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); + report("MSR_IA32_VMX_VMCS_ENUM", + (val & 0x3e) >= 0x2a && + (val & 0xfffffffffffffc01Ull) == 0); + + val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + report("MSR_IA32_VMX_EPT_VPID_CAP", + (val & 0xfffff07ef9eebebeUll) == 0); +} + +/* This function can only be called in guest */ +static void __attribute__((__used__)) hypercall(u32 hypercall_no) +{ + u64 val = 0; + val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; + hypercall_field = val; + asm volatile("vmcall\n\t"); +} + +static bool is_hypercall() +{ + ulong reason, hyper_bit; + + reason = vmcs_read(EXI_REASON) & 0xff; + hyper_bit = hypercall_field & HYPERCALL_BIT; + if (reason == VMX_VMCALL && hyper_bit) + return true; + return false; +} + +static int handle_hypercall() +{ + ulong hypercall_no; + + hypercall_no = hypercall_field & HYPERCALL_MASK; + hypercall_field = 0; + switch (hypercall_no) { + case HYPERCALL_VMEXIT: + return VMX_TEST_VMEXIT; + default: + printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); + } + return VMX_TEST_EXIT; +} + +static int exit_handler() +{ + int ret; + + current->exits++; + regs.rflags = vmcs_read(GUEST_RFLAGS); + if (is_hypercall()) + ret = handle_hypercall(); + else + ret = current->exit_handler(); + vmcs_write(GUEST_RFLAGS, regs.rflags); + + return ret; +} + +/* + * Called if vmlaunch or vmresume fails. + * @early - failure due to "VMX controls and host-state area" (26.2) + * @vmlaunch - was this a vmlaunch or vmresume + * @rflags - host rflags + */ +static int +entry_failure_handler(struct vmentry_failure *failure) +{ + if (current->entry_failure_handler) + return current->entry_failure_handler(failure); + else + return VMX_TEST_EXIT; +} + +static int vmx_run() +{ + unsigned long host_rflags; + + while (1) { + u32 ret; + u32 fail = 0; + bool entered; + struct vmentry_failure failure; + + asm volatile ( + "mov %[HOST_RSP], %%rdi\n\t" + "vmwrite %%rsp, %%rdi\n\t" + LOAD_GPR_C + "cmpb $0, %[launched]\n\t" + "jne 1f\n\t" + "vmlaunch\n\t" + "jmp 2f\n\t" + "1: " + "vmresume\n\t" + "2: " + SAVE_GPR_C + "pushf\n\t" + "pop %%rdi\n\t" + "mov %%rdi, %[host_rflags]\n\t" + "movl $1, %[fail]\n\t" + "jmp 3f\n\t" + "vmx_return:\n\t" + SAVE_GPR_C + "3: \n\t" + : [fail]"+m"(fail), [host_rflags]"=m"(host_rflags) + : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) + : "rdi", "memory", "cc" + + ); + + entered = !fail && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE); + + if (entered) { + /* + * VMCS isn't in "launched" state if there's been any + * entry failure (early or otherwise). + */ + launched = 1; + ret = exit_handler(); + } else { + failure.flags = host_rflags; + failure.vmlaunch = !launched; + failure.instr = launched ? "vmresume" : "vmlaunch"; + failure.early = fail; + ret = entry_failure_handler(&failure); + } + + switch (ret) { + case VMX_TEST_RESUME: + continue; + case VMX_TEST_VMEXIT: + return 0; + case VMX_TEST_EXIT: + break; + default: + printf("ERROR : Invalid %s_handler return val %d.\n", + entered ? "exit" : "entry_failure", + ret); + break; + } + + if (entered) + print_vmexit_info(); + else + print_vmentry_failure_info(&failure); + abort(); + } +} + +static int test_run(struct vmx_test *test) +{ + if (test->name == NULL) + test->name = "(no name)"; + if (vmx_on()) { + printf("%s : vmxon failed.\n", __func__); + return 1; + } + init_vmcs(&(test->vmcs)); + /* Directly call test->init is ok here, init_vmcs has done + vmcs init, vmclear and vmptrld*/ + if (test->init && test->init(test->vmcs) != VMX_TEST_START) + goto out; + test->exits = 0; + current = test; + regs = test->guest_regs; + vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); + launched = 0; + printf("\nTest suite: %s\n", test->name); + vmx_run(); +out: + if (vmx_off()) { + printf("%s : vmxoff failed.\n", __func__); + return 1; + } + return 0; +} + +extern struct vmx_test vmx_tests[]; + +int main(void) +{ + int i = 0; + + setup_vm(); + setup_idt(); + hypercall_field = 0; + + if (!(cpuid(1).c & (1 << 5))) { + printf("WARNING: vmx not supported, add '-cpu host'\n"); + goto exit; + } + init_vmx(); + if (test_vmx_feature_control() != 0) + goto exit; + /* Set basic test ctxt the same as "null" */ + current = &vmx_tests[0]; + if (test_vmxon() != 0) + goto exit; + test_vmptrld(); + test_vmclear(); + test_vmptrst(); + init_vmcs(&vmcs_root); + if (vmx_run()) { + report("test vmlaunch", 0); + goto exit; + } + test_vmxoff(); + test_vmx_caps(); + + while (vmx_tests[++i].name != NULL) + if (test_run(&vmx_tests[i])) + goto exit; + +exit: + return report_summary(); +} diff --git a/tests/kvm-unit-tests/x86/vmx.h b/tests/kvm-unit-tests/x86/vmx.h new file mode 100644 index 00000000..a2bacd34 --- /dev/null +++ b/tests/kvm-unit-tests/x86/vmx.h @@ -0,0 +1,622 @@ +#ifndef __VMX_H +#define __VMX_H + +#include "libcflat.h" +#include "processor.h" +#include "bitops.h" +#include "asm/page.h" + +struct vmcs { + u32 revision_id; /* vmcs revision identifier */ + u32 abort; /* VMX-abort indicator */ + /* VMCS data */ + char data[0]; +}; + +struct regs { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 cr2; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u64 rflags; +}; + +struct vmentry_failure { + /* Did a vmlaunch or vmresume fail? */ + bool vmlaunch; + /* Instruction mnemonic (for convenience). */ + const char *instr; + /* Did the instruction return right away, or did we jump to HOST_RIP? */ + bool early; + /* Contents of [re]flags after failed entry. */ + unsigned long flags; +}; + +struct vmx_test { + const char *name; + int (*init)(struct vmcs *vmcs); + void (*guest_main)(); + int (*exit_handler)(); + void (*syscall_handler)(u64 syscall_no); + struct regs guest_regs; + int (*entry_failure_handler)(struct vmentry_failure *failure); + struct vmcs *vmcs; + int exits; +}; + +union vmx_basic { + u64 val; + struct { + u32 revision; + u32 size:13, + reserved1: 3, + width:1, + dual:1, + type:4, + insouts:1, + ctrl:1, + reserved2:8; + }; +}; + +union vmx_ctrl_msr { + u64 val; + struct { + u32 set, clr; + }; +}; + +union vmx_ept_vpid { + u64 val; + struct { + u32:16, + super:2, + : 2, + invept:1, + : 11; + u32 invvpid:1; + }; +}; + +enum Encoding { + /* 16-Bit Control Fields */ + VPID = 0x0000ul, + /* Posted-interrupt notification vector */ + PINV = 0x0002ul, + /* EPTP index */ + EPTP_IDX = 0x0004ul, + + /* 16-Bit Guest State Fields */ + GUEST_SEL_ES = 0x0800ul, + GUEST_SEL_CS = 0x0802ul, + GUEST_SEL_SS = 0x0804ul, + GUEST_SEL_DS = 0x0806ul, + GUEST_SEL_FS = 0x0808ul, + GUEST_SEL_GS = 0x080aul, + GUEST_SEL_LDTR = 0x080cul, + GUEST_SEL_TR = 0x080eul, + GUEST_INT_STATUS = 0x0810ul, + + /* 16-Bit Host State Fields */ + HOST_SEL_ES = 0x0c00ul, + HOST_SEL_CS = 0x0c02ul, + HOST_SEL_SS = 0x0c04ul, + HOST_SEL_DS = 0x0c06ul, + HOST_SEL_FS = 0x0c08ul, + HOST_SEL_GS = 0x0c0aul, + HOST_SEL_TR = 0x0c0cul, + + /* 64-Bit Control Fields */ + IO_BITMAP_A = 0x2000ul, + IO_BITMAP_B = 0x2002ul, + MSR_BITMAP = 0x2004ul, + EXIT_MSR_ST_ADDR = 0x2006ul, + EXIT_MSR_LD_ADDR = 0x2008ul, + ENTER_MSR_LD_ADDR = 0x200aul, + VMCS_EXEC_PTR = 0x200cul, + TSC_OFFSET = 0x2010ul, + TSC_OFFSET_HI = 0x2011ul, + APIC_VIRT_ADDR = 0x2012ul, + APIC_ACCS_ADDR = 0x2014ul, + EPTP = 0x201aul, + EPTP_HI = 0x201bul, + + /* 64-Bit Readonly Data Field */ + INFO_PHYS_ADDR = 0x2400ul, + + /* 64-Bit Guest State */ + VMCS_LINK_PTR = 0x2800ul, + VMCS_LINK_PTR_HI = 0x2801ul, + GUEST_DEBUGCTL = 0x2802ul, + GUEST_DEBUGCTL_HI = 0x2803ul, + GUEST_EFER = 0x2806ul, + GUEST_PAT = 0x2804ul, + GUEST_PERF_GLOBAL_CTRL = 0x2808ul, + GUEST_PDPTE = 0x280aul, + + /* 64-Bit Host State */ + HOST_PAT = 0x2c00ul, + HOST_EFER = 0x2c02ul, + HOST_PERF_GLOBAL_CTRL = 0x2c04ul, + + /* 32-Bit Control Fields */ + PIN_CONTROLS = 0x4000ul, + CPU_EXEC_CTRL0 = 0x4002ul, + EXC_BITMAP = 0x4004ul, + PF_ERROR_MASK = 0x4006ul, + PF_ERROR_MATCH = 0x4008ul, + CR3_TARGET_COUNT = 0x400aul, + EXI_CONTROLS = 0x400cul, + EXI_MSR_ST_CNT = 0x400eul, + EXI_MSR_LD_CNT = 0x4010ul, + ENT_CONTROLS = 0x4012ul, + ENT_MSR_LD_CNT = 0x4014ul, + ENT_INTR_INFO = 0x4016ul, + ENT_INTR_ERROR = 0x4018ul, + ENT_INST_LEN = 0x401aul, + TPR_THRESHOLD = 0x401cul, + CPU_EXEC_CTRL1 = 0x401eul, + + /* 32-Bit R/O Data Fields */ + VMX_INST_ERROR = 0x4400ul, + EXI_REASON = 0x4402ul, + EXI_INTR_INFO = 0x4404ul, + EXI_INTR_ERROR = 0x4406ul, + IDT_VECT_INFO = 0x4408ul, + IDT_VECT_ERROR = 0x440aul, + EXI_INST_LEN = 0x440cul, + EXI_INST_INFO = 0x440eul, + + /* 32-Bit Guest State Fields */ + GUEST_LIMIT_ES = 0x4800ul, + GUEST_LIMIT_CS = 0x4802ul, + GUEST_LIMIT_SS = 0x4804ul, + GUEST_LIMIT_DS = 0x4806ul, + GUEST_LIMIT_FS = 0x4808ul, + GUEST_LIMIT_GS = 0x480aul, + GUEST_LIMIT_LDTR = 0x480cul, + GUEST_LIMIT_TR = 0x480eul, + GUEST_LIMIT_GDTR = 0x4810ul, + GUEST_LIMIT_IDTR = 0x4812ul, + GUEST_AR_ES = 0x4814ul, + GUEST_AR_CS = 0x4816ul, + GUEST_AR_SS = 0x4818ul, + GUEST_AR_DS = 0x481aul, + GUEST_AR_FS = 0x481cul, + GUEST_AR_GS = 0x481eul, + GUEST_AR_LDTR = 0x4820ul, + GUEST_AR_TR = 0x4822ul, + GUEST_INTR_STATE = 0x4824ul, + GUEST_ACTV_STATE = 0x4826ul, + GUEST_SMBASE = 0x4828ul, + GUEST_SYSENTER_CS = 0x482aul, + PREEMPT_TIMER_VALUE = 0x482eul, + + /* 32-Bit Host State Fields */ + HOST_SYSENTER_CS = 0x4c00ul, + + /* Natural-Width Control Fields */ + CR0_MASK = 0x6000ul, + CR4_MASK = 0x6002ul, + CR0_READ_SHADOW = 0x6004ul, + CR4_READ_SHADOW = 0x6006ul, + CR3_TARGET_0 = 0x6008ul, + CR3_TARGET_1 = 0x600aul, + CR3_TARGET_2 = 0x600cul, + CR3_TARGET_3 = 0x600eul, + + /* Natural-Width R/O Data Fields */ + EXI_QUALIFICATION = 0x6400ul, + IO_RCX = 0x6402ul, + IO_RSI = 0x6404ul, + IO_RDI = 0x6406ul, + IO_RIP = 0x6408ul, + GUEST_LINEAR_ADDRESS = 0x640aul, + + /* Natural-Width Guest State Fields */ + GUEST_CR0 = 0x6800ul, + GUEST_CR3 = 0x6802ul, + GUEST_CR4 = 0x6804ul, + GUEST_BASE_ES = 0x6806ul, + GUEST_BASE_CS = 0x6808ul, + GUEST_BASE_SS = 0x680aul, + GUEST_BASE_DS = 0x680cul, + GUEST_BASE_FS = 0x680eul, + GUEST_BASE_GS = 0x6810ul, + GUEST_BASE_LDTR = 0x6812ul, + GUEST_BASE_TR = 0x6814ul, + GUEST_BASE_GDTR = 0x6816ul, + GUEST_BASE_IDTR = 0x6818ul, + GUEST_DR7 = 0x681aul, + GUEST_RSP = 0x681cul, + GUEST_RIP = 0x681eul, + GUEST_RFLAGS = 0x6820ul, + GUEST_PENDING_DEBUG = 0x6822ul, + GUEST_SYSENTER_ESP = 0x6824ul, + GUEST_SYSENTER_EIP = 0x6826ul, + + /* Natural-Width Host State Fields */ + HOST_CR0 = 0x6c00ul, + HOST_CR3 = 0x6c02ul, + HOST_CR4 = 0x6c04ul, + HOST_BASE_FS = 0x6c06ul, + HOST_BASE_GS = 0x6c08ul, + HOST_BASE_TR = 0x6c0aul, + HOST_BASE_GDTR = 0x6c0cul, + HOST_BASE_IDTR = 0x6c0eul, + HOST_SYSENTER_ESP = 0x6c10ul, + HOST_SYSENTER_EIP = 0x6c12ul, + HOST_RSP = 0x6c14ul, + HOST_RIP = 0x6c16ul +}; + +#define VMX_ENTRY_FAILURE (1ul << 31) +#define VMX_ENTRY_FLAGS (X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | \ + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF) + +enum Reason { + VMX_EXC_NMI = 0, + VMX_EXTINT = 1, + VMX_TRIPLE_FAULT = 2, + VMX_INIT = 3, + VMX_SIPI = 4, + VMX_SMI_IO = 5, + VMX_SMI_OTHER = 6, + VMX_INTR_WINDOW = 7, + VMX_NMI_WINDOW = 8, + VMX_TASK_SWITCH = 9, + VMX_CPUID = 10, + VMX_GETSEC = 11, + VMX_HLT = 12, + VMX_INVD = 13, + VMX_INVLPG = 14, + VMX_RDPMC = 15, + VMX_RDTSC = 16, + VMX_RSM = 17, + VMX_VMCALL = 18, + VMX_VMCLEAR = 19, + VMX_VMLAUNCH = 20, + VMX_VMPTRLD = 21, + VMX_VMPTRST = 22, + VMX_VMREAD = 23, + VMX_VMRESUME = 24, + VMX_VMWRITE = 25, + VMX_VMXOFF = 26, + VMX_VMXON = 27, + VMX_CR = 28, + VMX_DR = 29, + VMX_IO = 30, + VMX_RDMSR = 31, + VMX_WRMSR = 32, + VMX_FAIL_STATE = 33, + VMX_FAIL_MSR = 34, + VMX_MWAIT = 36, + VMX_MTF = 37, + VMX_MONITOR = 39, + VMX_PAUSE = 40, + VMX_FAIL_MCHECK = 41, + VMX_TPR_THRESHOLD = 43, + VMX_APIC_ACCESS = 44, + VMX_GDTR_IDTR = 46, + VMX_LDTR_TR = 47, + VMX_EPT_VIOLATION = 48, + VMX_EPT_MISCONFIG = 49, + VMX_INVEPT = 50, + VMX_PREEMPT = 52, + VMX_INVVPID = 53, + VMX_WBINVD = 54, + VMX_XSETBV = 55 +}; + +enum Ctrl_exi { + EXI_SAVE_DBGCTLS = 1UL << 2, + EXI_HOST_64 = 1UL << 9, + EXI_LOAD_PERF = 1UL << 12, + EXI_INTA = 1UL << 15, + EXI_SAVE_PAT = 1UL << 18, + EXI_LOAD_PAT = 1UL << 19, + EXI_SAVE_EFER = 1UL << 20, + EXI_LOAD_EFER = 1UL << 21, + EXI_SAVE_PREEMPT = 1UL << 22, +}; + +enum Ctrl_ent { + ENT_LOAD_DBGCTLS = 1UL << 2, + ENT_GUEST_64 = 1UL << 9, + ENT_LOAD_PAT = 1UL << 14, + ENT_LOAD_EFER = 1UL << 15, +}; + +enum Ctrl_pin { + PIN_EXTINT = 1ul << 0, + PIN_NMI = 1ul << 3, + PIN_VIRT_NMI = 1ul << 5, + PIN_PREEMPT = 1ul << 6, +}; + +enum Ctrl0 { + CPU_INTR_WINDOW = 1ul << 2, + CPU_HLT = 1ul << 7, + CPU_INVLPG = 1ul << 9, + CPU_MWAIT = 1ul << 10, + CPU_RDPMC = 1ul << 11, + CPU_RDTSC = 1ul << 12, + CPU_CR3_LOAD = 1ul << 15, + CPU_CR3_STORE = 1ul << 16, + CPU_CR8_LOAD = 1ul << 19, + CPU_CR8_STORE = 1ul << 20, + CPU_TPR_SHADOW = 1ul << 21, + CPU_NMI_WINDOW = 1ul << 22, + CPU_IO = 1ul << 24, + CPU_IO_BITMAP = 1ul << 25, + CPU_MSR_BITMAP = 1ul << 28, + CPU_MONITOR = 1ul << 29, + CPU_PAUSE = 1ul << 30, + CPU_SECONDARY = 1ul << 31, +}; + +enum Ctrl1 { + CPU_EPT = 1ul << 1, + CPU_DESC_TABLE = 1ul << 2, + CPU_RDTSCP = 1ul << 3, + CPU_VPID = 1ul << 5, + CPU_URG = 1ul << 7, + CPU_WBINVD = 1ul << 6, + CPU_RDRAND = 1ul << 11, +}; + +enum Intr_type { + VMX_INTR_TYPE_EXT_INTR = 0, + VMX_INTR_TYPE_NMI_INTR = 2, + VMX_INTR_TYPE_HARD_EXCEPTION = 3, + VMX_INTR_TYPE_SOFT_INTR = 4, + VMX_INTR_TYPE_SOFT_EXCEPTION = 6, +}; + +/* + * Interruption-information format + */ +#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ +#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ +#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ +#define INTR_INFO_UNBLOCK_NMI_MASK 0x1000 /* 12 */ +#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ + +#define INTR_INFO_INTR_TYPE_SHIFT 8 + +#define SAVE_GPR \ + "xchg %rax, regs\n\t" \ + "xchg %rbx, regs+0x8\n\t" \ + "xchg %rcx, regs+0x10\n\t" \ + "xchg %rdx, regs+0x18\n\t" \ + "xchg %rbp, regs+0x28\n\t" \ + "xchg %rsi, regs+0x30\n\t" \ + "xchg %rdi, regs+0x38\n\t" \ + "xchg %r8, regs+0x40\n\t" \ + "xchg %r9, regs+0x48\n\t" \ + "xchg %r10, regs+0x50\n\t" \ + "xchg %r11, regs+0x58\n\t" \ + "xchg %r12, regs+0x60\n\t" \ + "xchg %r13, regs+0x68\n\t" \ + "xchg %r14, regs+0x70\n\t" \ + "xchg %r15, regs+0x78\n\t" + +#define LOAD_GPR SAVE_GPR + +#define SAVE_GPR_C \ + "xchg %%rax, regs\n\t" \ + "xchg %%rbx, regs+0x8\n\t" \ + "xchg %%rcx, regs+0x10\n\t" \ + "xchg %%rdx, regs+0x18\n\t" \ + "xchg %%rbp, regs+0x28\n\t" \ + "xchg %%rsi, regs+0x30\n\t" \ + "xchg %%rdi, regs+0x38\n\t" \ + "xchg %%r8, regs+0x40\n\t" \ + "xchg %%r9, regs+0x48\n\t" \ + "xchg %%r10, regs+0x50\n\t" \ + "xchg %%r11, regs+0x58\n\t" \ + "xchg %%r12, regs+0x60\n\t" \ + "xchg %%r13, regs+0x68\n\t" \ + "xchg %%r14, regs+0x70\n\t" \ + "xchg %%r15, regs+0x78\n\t" + +#define LOAD_GPR_C SAVE_GPR_C + +#define VMX_IO_SIZE_MASK 0x7 +#define _VMX_IO_BYTE 0 +#define _VMX_IO_WORD 1 +#define _VMX_IO_LONG 3 +#define VMX_IO_DIRECTION_MASK (1ul << 3) +#define VMX_IO_IN (1ul << 3) +#define VMX_IO_OUT 0 +#define VMX_IO_STRING (1ul << 4) +#define VMX_IO_REP (1ul << 5) +#define VMX_IO_OPRAND_IMM (1ul << 6) +#define VMX_IO_PORT_MASK 0xFFFF0000 +#define VMX_IO_PORT_SHIFT 16 + +#define VMX_TEST_START 0 +#define VMX_TEST_VMEXIT 1 +#define VMX_TEST_EXIT 2 +#define VMX_TEST_RESUME 3 + +#define HYPERCALL_BIT (1ul << 12) +#define HYPERCALL_MASK 0xFFF +#define HYPERCALL_VMEXIT 0x1 + +#define EPTP_PG_WALK_LEN_SHIFT 3ul +#define EPTP_AD_FLAG (1ul << 6) + +#define EPT_MEM_TYPE_UC 0ul +#define EPT_MEM_TYPE_WC 1ul +#define EPT_MEM_TYPE_WT 4ul +#define EPT_MEM_TYPE_WP 5ul +#define EPT_MEM_TYPE_WB 6ul + +#define EPT_RA 1ul +#define EPT_WA 2ul +#define EPT_EA 4ul +#define EPT_PRESENT (EPT_RA | EPT_WA | EPT_EA) +#define EPT_ACCESS_FLAG (1ul << 8) +#define EPT_DIRTY_FLAG (1ul << 9) +#define EPT_LARGE_PAGE (1ul << 7) +#define EPT_MEM_TYPE_SHIFT 3ul +#define EPT_IGNORE_PAT (1ul << 6) +#define EPT_SUPPRESS_VE (1ull << 63) + +#define EPT_CAP_WT 1ull +#define EPT_CAP_PWL4 (1ull << 6) +#define EPT_CAP_UC (1ull << 8) +#define EPT_CAP_WB (1ull << 14) +#define EPT_CAP_2M_PAGE (1ull << 16) +#define EPT_CAP_1G_PAGE (1ull << 17) +#define EPT_CAP_INVEPT (1ull << 20) +#define EPT_CAP_INVEPT_SINGLE (1ull << 25) +#define EPT_CAP_INVEPT_ALL (1ull << 26) +#define EPT_CAP_AD_FLAG (1ull << 21) +#define VPID_CAP_INVVPID (1ull << 32) +#define VPID_CAP_INVVPID_SINGLE (1ull << 41) +#define VPID_CAP_INVVPID_ALL (1ull << 42) + +#define PAGE_SIZE_2M (512 * PAGE_SIZE) +#define PAGE_SIZE_1G (512 * PAGE_SIZE_2M) +#define EPT_PAGE_LEVEL 4 +#define EPT_PGDIR_WIDTH 9 +#define EPT_PGDIR_MASK 511 +#define EPT_PGDIR_ENTRIES (1 << EPT_PGDIR_WIDTH) +#define EPT_LEVEL_SHIFT(level) (((level)-1) * EPT_PGDIR_WIDTH + 12) +#define EPT_ADDR_MASK GENMASK_ULL(51, 12) +#define PAGE_MASK_2M (~(PAGE_SIZE_2M-1)) + +#define EPT_VLT_RD 1 +#define EPT_VLT_WR (1 << 1) +#define EPT_VLT_FETCH (1 << 2) +#define EPT_VLT_PERM_RD (1 << 3) +#define EPT_VLT_PERM_WR (1 << 4) +#define EPT_VLT_PERM_EX (1 << 5) +#define EPT_VLT_LADDR_VLD (1 << 7) +#define EPT_VLT_PADDR (1 << 8) + +#define MAGIC_VAL_1 0x12345678ul +#define MAGIC_VAL_2 0x87654321ul +#define MAGIC_VAL_3 0xfffffffful + +#define INVEPT_SINGLE 1 +#define INVEPT_GLOBAL 2 + +#define INVVPID_SINGLE_ADDRESS 0 +#define INVVPID_SINGLE 1 +#define INVVPID_ALL 2 + +#define ACTV_ACTIVE 0 +#define ACTV_HLT 1 + +extern struct regs regs; + +extern union vmx_basic basic; +extern union vmx_ctrl_msr ctrl_pin_rev; +extern union vmx_ctrl_msr ctrl_cpu_rev[2]; +extern union vmx_ctrl_msr ctrl_exit_rev; +extern union vmx_ctrl_msr ctrl_enter_rev; +extern union vmx_ept_vpid ept_vpid; + +void vmx_set_test_stage(u32 s); +u32 vmx_get_test_stage(void); +void vmx_inc_test_stage(void); + +static inline int vmcs_clear(struct vmcs *vmcs) +{ + bool ret; + u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + + asm volatile ("push %1; popf; vmclear %2; setbe %0" + : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc"); + return ret; +} + +static inline u64 vmcs_read(enum Encoding enc) +{ + u64 val; + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); + return val; +} + +static inline int vmcs_write(enum Encoding enc, u64 val) +{ + bool ret; + asm volatile ("vmwrite %1, %2; setbe %0" + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); + return ret; +} + +static inline int vmcs_save(struct vmcs **vmcs) +{ + bool ret; + u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + + asm volatile ("push %1; popf; vmptrst %2; setbe %0" + : "=q" (ret) : "q" (rflags), "m" (*vmcs) : "cc"); + return ret; +} + +static inline bool invept(unsigned long type, u64 eptp) +{ + bool ret; + u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + + struct { + u64 eptp, gpa; + } operand = {eptp, 0}; + asm volatile("push %1; popf; invept %2, %3; setbe %0" + : "=q" (ret) : "r" (rflags), "m"(operand),"r"(type) : "cc"); + return ret; +} + +static inline bool invvpid(unsigned long type, u16 vpid, u64 gva) +{ + bool ret; + u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + + struct { + u64 vpid : 16; + u64 rsvd : 48; + u64 gva; + } operand = {vpid, 0, gva}; + asm volatile("push %1; popf; invvpid %2, %3; setbe %0" + : "=q" (ret) : "r" (rflags), "m"(operand),"r"(type) : "cc"); + return ret; +} + +void print_vmexit_info(); +void print_vmentry_failure_info(struct vmentry_failure *failure); +void ept_sync(int type, u64 eptp); +void vpid_sync(int type, u16 vpid); +void install_ept_entry(unsigned long *pml4, int pte_level, + unsigned long guest_addr, unsigned long pte, + unsigned long *pt_page); +void install_1g_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +void install_2m_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +void install_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +void setup_ept_range(unsigned long *pml4, unsigned long start, + unsigned long len, int map_1g, int map_2m, u64 perm); +unsigned long get_ept_pte(unsigned long *pml4, + unsigned long guest_addr, int level); +int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, + int level, u64 pte_val); + +#endif diff --git a/tests/kvm-unit-tests/x86/vmx_tests.c b/tests/kvm-unit-tests/x86/vmx_tests.c new file mode 100644 index 00000000..fff12141 --- /dev/null +++ b/tests/kvm-unit-tests/x86/vmx_tests.c @@ -0,0 +1,1849 @@ +/* + * All test cases of nested virtualization should be in this file + * + * Author : Arthur Chunqi Li + */ +#include "vmx.h" +#include "msr.h" +#include "processor.h" +#include "vm.h" +#include "fwcfg.h" +#include "isr.h" +#include "desc.h" +#include "apic.h" +#include "types.h" + +u64 ia32_pat; +u64 ia32_efer; +void *io_bitmap_a, *io_bitmap_b; +u16 ioport; + +unsigned long *pml4; +u64 eptp; +void *data_page1, *data_page2; + +static inline void vmcall() +{ + asm volatile("vmcall"); +} + +void basic_guest_main() +{ +} + +int basic_exit_handler() +{ + report("Basic VMX test", 0); + print_vmexit_info(); + return VMX_TEST_EXIT; +} + +void vmenter_main() +{ + u64 rax; + u64 rsp, resume_rsp; + + report("test vmlaunch", 1); + + asm volatile( + "mov %%rsp, %0\n\t" + "mov %3, %%rax\n\t" + "vmcall\n\t" + "mov %%rax, %1\n\t" + "mov %%rsp, %2\n\t" + : "=r"(rsp), "=r"(rax), "=r"(resume_rsp) + : "g"(0xABCD)); + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); +} + +int vmenter_exit_handler() +{ + u64 guest_rip; + ulong reason; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + switch (reason) { + case VMX_VMCALL: + if (regs.rax != 0xABCD) { + report("test vmresume", 0); + return VMX_TEST_VMEXIT; + } + regs.rax = 0xFFFF; + vmcs_write(GUEST_RIP, guest_rip + 3); + return VMX_TEST_RESUME; + default: + report("test vmresume", 0); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + +u32 preempt_scale; +volatile unsigned long long tsc_val; +volatile u32 preempt_val; +u64 saved_rip; + +int preemption_timer_init() +{ + if (!(ctrl_pin_rev.clr & PIN_PREEMPT)) { + printf("\tPreemption timer is not supported\n"); + return VMX_TEST_EXIT; + } + vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) | PIN_PREEMPT); + preempt_val = 10000000; + vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); + preempt_scale = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; + + if (!(ctrl_exit_rev.clr & EXI_SAVE_PREEMPT)) + printf("\tSave preemption value is not supported\n"); + + return VMX_TEST_START; +} + +void preemption_timer_main() +{ + tsc_val = rdtsc(); + if (ctrl_exit_rev.clr & EXI_SAVE_PREEMPT) { + vmx_set_test_stage(0); + vmcall(); + if (vmx_get_test_stage() == 1) + vmcall(); + } + vmx_set_test_stage(1); + while (vmx_get_test_stage() == 1) { + if (((rdtsc() - tsc_val) >> preempt_scale) + > 10 * preempt_val) { + vmx_set_test_stage(2); + vmcall(); + } + } + tsc_val = rdtsc(); + asm volatile ("hlt"); + vmcall(); + vmx_set_test_stage(5); + vmcall(); +} + +int preemption_timer_exit_handler() +{ + bool guest_halted; + u64 guest_rip; + ulong reason; + u32 insn_len; + u32 ctrl_exit; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + insn_len = vmcs_read(EXI_INST_LEN); + switch (reason) { + case VMX_PREEMPT: + switch (vmx_get_test_stage()) { + case 1: + case 2: + report("busy-wait for preemption timer", + ((rdtsc() - tsc_val) >> preempt_scale) >= + preempt_val); + vmx_set_test_stage(3); + vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); + return VMX_TEST_RESUME; + case 3: + guest_halted = + (vmcs_read(GUEST_ACTV_STATE) == ACTV_HLT); + report("preemption timer during hlt", + ((rdtsc() - tsc_val) >> preempt_scale) >= + preempt_val && guest_halted); + vmx_set_test_stage(4); + vmcs_write(PIN_CONTROLS, + vmcs_read(PIN_CONTROLS) & ~PIN_PREEMPT); + vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); + return VMX_TEST_RESUME; + case 4: + report("preemption timer with 0 value", + saved_rip == guest_rip); + break; + default: + printf("Invalid stage.\n"); + print_vmexit_info(); + break; + } + break; + case VMX_VMCALL: + vmcs_write(GUEST_RIP, guest_rip + insn_len); + switch (vmx_get_test_stage()) { + case 0: + report("Keep preemption value", + vmcs_read(PREEMPT_TIMER_VALUE) == preempt_val); + vmx_set_test_stage(1); + vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); + ctrl_exit = (vmcs_read(EXI_CONTROLS) | + EXI_SAVE_PREEMPT) & ctrl_exit_rev.clr; + vmcs_write(EXI_CONTROLS, ctrl_exit); + return VMX_TEST_RESUME; + case 1: + report("Save preemption value", + vmcs_read(PREEMPT_TIMER_VALUE) < preempt_val); + return VMX_TEST_RESUME; + case 2: + report("busy-wait for preemption timer", 0); + vmx_set_test_stage(3); + vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); + return VMX_TEST_RESUME; + case 3: + report("preemption timer during hlt", 0); + vmx_set_test_stage(4); + /* fall through */ + case 4: + vmcs_write(PIN_CONTROLS, + vmcs_read(PIN_CONTROLS) | PIN_PREEMPT); + vmcs_write(PREEMPT_TIMER_VALUE, 0); + saved_rip = guest_rip + insn_len; + return VMX_TEST_RESUME; + case 5: + report("preemption timer with 0 value (vmcall stage 5)", 0); + break; + default: + // Should not reach here + printf("ERROR : unexpected stage, %d\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + break; + default: + printf("Unknown exit reason, %ld\n", reason); + print_vmexit_info(); + } + vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_PREEMPT); + return VMX_TEST_VMEXIT; +} + +void msr_bmp_init() +{ + void *msr_bitmap; + u32 ctrl_cpu0; + + msr_bitmap = alloc_page(); + memset(msr_bitmap, 0x0, PAGE_SIZE); + ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); + ctrl_cpu0 |= CPU_MSR_BITMAP; + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); + vmcs_write(MSR_BITMAP, (u64)msr_bitmap); +} + +static int test_ctrl_pat_init() +{ + u64 ctrl_ent; + u64 ctrl_exi; + + msr_bmp_init(); + if (!(ctrl_exit_rev.clr & EXI_SAVE_PAT) && + !(ctrl_exit_rev.clr & EXI_LOAD_PAT) && + !(ctrl_enter_rev.clr & ENT_LOAD_PAT)) { + printf("\tSave/load PAT is not supported\n"); + return 1; + } + + ctrl_ent = vmcs_read(ENT_CONTROLS); + ctrl_exi = vmcs_read(EXI_CONTROLS); + ctrl_ent |= ctrl_enter_rev.clr & ENT_LOAD_PAT; + ctrl_exi |= ctrl_exit_rev.clr & (EXI_SAVE_PAT | EXI_LOAD_PAT); + vmcs_write(ENT_CONTROLS, ctrl_ent); + vmcs_write(EXI_CONTROLS, ctrl_exi); + ia32_pat = rdmsr(MSR_IA32_CR_PAT); + vmcs_write(GUEST_PAT, 0x0); + vmcs_write(HOST_PAT, ia32_pat); + return VMX_TEST_START; +} + +static void test_ctrl_pat_main() +{ + u64 guest_ia32_pat; + + guest_ia32_pat = rdmsr(MSR_IA32_CR_PAT); + if (!(ctrl_enter_rev.clr & ENT_LOAD_PAT)) + printf("\tENT_LOAD_PAT is not supported.\n"); + else { + if (guest_ia32_pat != 0) { + report("Entry load PAT", 0); + return; + } + } + wrmsr(MSR_IA32_CR_PAT, 0x6); + vmcall(); + guest_ia32_pat = rdmsr(MSR_IA32_CR_PAT); + if (ctrl_enter_rev.clr & ENT_LOAD_PAT) + report("Entry load PAT", guest_ia32_pat == ia32_pat); +} + +static int test_ctrl_pat_exit_handler() +{ + u64 guest_rip; + ulong reason; + u64 guest_pat; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + switch (reason) { + case VMX_VMCALL: + guest_pat = vmcs_read(GUEST_PAT); + if (!(ctrl_exit_rev.clr & EXI_SAVE_PAT)) { + printf("\tEXI_SAVE_PAT is not supported\n"); + vmcs_write(GUEST_PAT, 0x6); + } else { + report("Exit save PAT", guest_pat == 0x6); + } + if (!(ctrl_exit_rev.clr & EXI_LOAD_PAT)) + printf("\tEXI_LOAD_PAT is not supported\n"); + else + report("Exit load PAT", rdmsr(MSR_IA32_CR_PAT) == ia32_pat); + vmcs_write(GUEST_PAT, ia32_pat); + vmcs_write(GUEST_RIP, guest_rip + 3); + return VMX_TEST_RESUME; + default: + printf("ERROR : Undefined exit reason, reason = %ld.\n", reason); + break; + } + return VMX_TEST_VMEXIT; +} + +static int test_ctrl_efer_init() +{ + u64 ctrl_ent; + u64 ctrl_exi; + + msr_bmp_init(); + ctrl_ent = vmcs_read(ENT_CONTROLS) | ENT_LOAD_EFER; + ctrl_exi = vmcs_read(EXI_CONTROLS) | EXI_SAVE_EFER | EXI_LOAD_EFER; + vmcs_write(ENT_CONTROLS, ctrl_ent & ctrl_enter_rev.clr); + vmcs_write(EXI_CONTROLS, ctrl_exi & ctrl_exit_rev.clr); + ia32_efer = rdmsr(MSR_EFER); + vmcs_write(GUEST_EFER, ia32_efer ^ EFER_NX); + vmcs_write(HOST_EFER, ia32_efer ^ EFER_NX); + return VMX_TEST_START; +} + +static void test_ctrl_efer_main() +{ + u64 guest_ia32_efer; + + guest_ia32_efer = rdmsr(MSR_EFER); + if (!(ctrl_enter_rev.clr & ENT_LOAD_EFER)) + printf("\tENT_LOAD_EFER is not supported.\n"); + else { + if (guest_ia32_efer != (ia32_efer ^ EFER_NX)) { + report("Entry load EFER", 0); + return; + } + } + wrmsr(MSR_EFER, ia32_efer); + vmcall(); + guest_ia32_efer = rdmsr(MSR_EFER); + if (ctrl_enter_rev.clr & ENT_LOAD_EFER) + report("Entry load EFER", guest_ia32_efer == ia32_efer); +} + +static int test_ctrl_efer_exit_handler() +{ + u64 guest_rip; + ulong reason; + u64 guest_efer; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + switch (reason) { + case VMX_VMCALL: + guest_efer = vmcs_read(GUEST_EFER); + if (!(ctrl_exit_rev.clr & EXI_SAVE_EFER)) { + printf("\tEXI_SAVE_EFER is not supported\n"); + vmcs_write(GUEST_EFER, ia32_efer); + } else { + report("Exit save EFER", guest_efer == ia32_efer); + } + if (!(ctrl_exit_rev.clr & EXI_LOAD_EFER)) { + printf("\tEXI_LOAD_EFER is not supported\n"); + wrmsr(MSR_EFER, ia32_efer ^ EFER_NX); + } else { + report("Exit load EFER", rdmsr(MSR_EFER) == (ia32_efer ^ EFER_NX)); + } + vmcs_write(GUEST_PAT, ia32_efer); + vmcs_write(GUEST_RIP, guest_rip + 3); + return VMX_TEST_RESUME; + default: + printf("ERROR : Undefined exit reason, reason = %ld.\n", reason); + break; + } + return VMX_TEST_VMEXIT; +} + +u32 guest_cr0, guest_cr4; + +static void cr_shadowing_main() +{ + u32 cr0, cr4, tmp; + + // Test read through + vmx_set_test_stage(0); + guest_cr0 = read_cr0(); + if (vmx_get_test_stage() == 1) + report("Read through CR0", 0); + else + vmcall(); + vmx_set_test_stage(1); + guest_cr4 = read_cr4(); + if (vmx_get_test_stage() == 2) + report("Read through CR4", 0); + else + vmcall(); + // Test write through + guest_cr0 = guest_cr0 ^ (X86_CR0_TS | X86_CR0_MP); + guest_cr4 = guest_cr4 ^ (X86_CR4_TSD | X86_CR4_DE); + vmx_set_test_stage(2); + write_cr0(guest_cr0); + if (vmx_get_test_stage() == 3) + report("Write throuth CR0", 0); + else + vmcall(); + vmx_set_test_stage(3); + write_cr4(guest_cr4); + if (vmx_get_test_stage() == 4) + report("Write through CR4", 0); + else + vmcall(); + // Test read shadow + vmx_set_test_stage(4); + vmcall(); + cr0 = read_cr0(); + if (vmx_get_test_stage() != 5) + report("Read shadowing CR0", cr0 == guest_cr0); + vmx_set_test_stage(5); + cr4 = read_cr4(); + if (vmx_get_test_stage() != 6) + report("Read shadowing CR4", cr4 == guest_cr4); + // Test write shadow (same value with shadow) + vmx_set_test_stage(6); + write_cr0(guest_cr0); + if (vmx_get_test_stage() == 7) + report("Write shadowing CR0 (same value with shadow)", 0); + else + vmcall(); + vmx_set_test_stage(7); + write_cr4(guest_cr4); + if (vmx_get_test_stage() == 8) + report("Write shadowing CR4 (same value with shadow)", 0); + else + vmcall(); + // Test write shadow (different value) + vmx_set_test_stage(8); + tmp = guest_cr0 ^ X86_CR0_TS; + asm volatile("mov %0, %%rsi\n\t" + "mov %%rsi, %%cr0\n\t" + ::"m"(tmp) + :"rsi", "memory", "cc"); + report("Write shadowing different X86_CR0_TS", vmx_get_test_stage() == 9); + vmx_set_test_stage(9); + tmp = guest_cr0 ^ X86_CR0_MP; + asm volatile("mov %0, %%rsi\n\t" + "mov %%rsi, %%cr0\n\t" + ::"m"(tmp) + :"rsi", "memory", "cc"); + report("Write shadowing different X86_CR0_MP", vmx_get_test_stage() == 10); + vmx_set_test_stage(10); + tmp = guest_cr4 ^ X86_CR4_TSD; + asm volatile("mov %0, %%rsi\n\t" + "mov %%rsi, %%cr4\n\t" + ::"m"(tmp) + :"rsi", "memory", "cc"); + report("Write shadowing different X86_CR4_TSD", vmx_get_test_stage() == 11); + vmx_set_test_stage(11); + tmp = guest_cr4 ^ X86_CR4_DE; + asm volatile("mov %0, %%rsi\n\t" + "mov %%rsi, %%cr4\n\t" + ::"m"(tmp) + :"rsi", "memory", "cc"); + report("Write shadowing different X86_CR4_DE", vmx_get_test_stage() == 12); +} + +static int cr_shadowing_exit_handler() +{ + u64 guest_rip; + ulong reason; + u32 insn_len; + u32 exit_qual; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + insn_len = vmcs_read(EXI_INST_LEN); + exit_qual = vmcs_read(EXI_QUALIFICATION); + switch (reason) { + case VMX_VMCALL: + switch (vmx_get_test_stage()) { + case 0: + report("Read through CR0", guest_cr0 == vmcs_read(GUEST_CR0)); + break; + case 1: + report("Read through CR4", guest_cr4 == vmcs_read(GUEST_CR4)); + break; + case 2: + report("Write through CR0", guest_cr0 == vmcs_read(GUEST_CR0)); + break; + case 3: + report("Write through CR4", guest_cr4 == vmcs_read(GUEST_CR4)); + break; + case 4: + guest_cr0 = vmcs_read(GUEST_CR0) ^ (X86_CR0_TS | X86_CR0_MP); + guest_cr4 = vmcs_read(GUEST_CR4) ^ (X86_CR4_TSD | X86_CR4_DE); + vmcs_write(CR0_MASK, X86_CR0_TS | X86_CR0_MP); + vmcs_write(CR0_READ_SHADOW, guest_cr0 & (X86_CR0_TS | X86_CR0_MP)); + vmcs_write(CR4_MASK, X86_CR4_TSD | X86_CR4_DE); + vmcs_write(CR4_READ_SHADOW, guest_cr4 & (X86_CR4_TSD | X86_CR4_DE)); + break; + case 6: + report("Write shadowing CR0 (same value)", + guest_cr0 == (vmcs_read(GUEST_CR0) ^ (X86_CR0_TS | X86_CR0_MP))); + break; + case 7: + report("Write shadowing CR4 (same value)", + guest_cr4 == (vmcs_read(GUEST_CR4) ^ (X86_CR4_TSD | X86_CR4_DE))); + break; + default: + // Should not reach here + printf("ERROR : unexpected stage, %d\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + case VMX_CR: + switch (vmx_get_test_stage()) { + case 4: + report("Read shadowing CR0", 0); + vmx_inc_test_stage(); + break; + case 5: + report("Read shadowing CR4", 0); + vmx_inc_test_stage(); + break; + case 6: + report("Write shadowing CR0 (same value)", 0); + vmx_inc_test_stage(); + break; + case 7: + report("Write shadowing CR4 (same value)", 0); + vmx_inc_test_stage(); + break; + case 8: + case 9: + // 0x600 encodes "mov %esi, %cr0" + if (exit_qual == 0x600) + vmx_inc_test_stage(); + break; + case 10: + case 11: + // 0x604 encodes "mov %esi, %cr4" + if (exit_qual == 0x604) + vmx_inc_test_stage(); + break; + default: + // Should not reach here + printf("ERROR : unexpected stage, %d\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + default: + printf("Unknown exit reason, %ld\n", reason); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + +static int iobmp_init() +{ + u32 ctrl_cpu0; + + io_bitmap_a = alloc_page(); + io_bitmap_b = alloc_page(); + memset(io_bitmap_a, 0x0, PAGE_SIZE); + memset(io_bitmap_b, 0x0, PAGE_SIZE); + ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); + ctrl_cpu0 |= CPU_IO_BITMAP; + ctrl_cpu0 &= (~CPU_IO); + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); + vmcs_write(IO_BITMAP_A, (u64)io_bitmap_a); + vmcs_write(IO_BITMAP_B, (u64)io_bitmap_b); + return VMX_TEST_START; +} + +static void iobmp_main() +{ + // stage 0, test IO pass + vmx_set_test_stage(0); + inb(0x5000); + outb(0x0, 0x5000); + report("I/O bitmap - I/O pass", vmx_get_test_stage() == 0); + // test IO width, in/out + ((u8 *)io_bitmap_a)[0] = 0xFF; + vmx_set_test_stage(2); + inb(0x0); + report("I/O bitmap - trap in", vmx_get_test_stage() == 3); + vmx_set_test_stage(3); + outw(0x0, 0x0); + report("I/O bitmap - trap out", vmx_get_test_stage() == 4); + vmx_set_test_stage(4); + inl(0x0); + report("I/O bitmap - I/O width, long", vmx_get_test_stage() == 5); + // test low/high IO port + vmx_set_test_stage(5); + ((u8 *)io_bitmap_a)[0x5000 / 8] = (1 << (0x5000 % 8)); + inb(0x5000); + report("I/O bitmap - I/O port, low part", vmx_get_test_stage() == 6); + vmx_set_test_stage(6); + ((u8 *)io_bitmap_b)[0x1000 / 8] = (1 << (0x1000 % 8)); + inb(0x9000); + report("I/O bitmap - I/O port, high part", vmx_get_test_stage() == 7); + // test partial pass + vmx_set_test_stage(7); + inl(0x4FFF); + report("I/O bitmap - partial pass", vmx_get_test_stage() == 8); + // test overrun + vmx_set_test_stage(8); + memset(io_bitmap_a, 0x0, PAGE_SIZE); + memset(io_bitmap_b, 0x0, PAGE_SIZE); + inl(0xFFFF); + report("I/O bitmap - overrun", vmx_get_test_stage() == 9); + vmx_set_test_stage(9); + vmcall(); + outb(0x0, 0x0); + report("I/O bitmap - ignore unconditional exiting", + vmx_get_test_stage() == 9); + vmx_set_test_stage(10); + vmcall(); + outb(0x0, 0x0); + report("I/O bitmap - unconditional exiting", + vmx_get_test_stage() == 11); +} + +static int iobmp_exit_handler() +{ + u64 guest_rip; + ulong reason, exit_qual; + u32 insn_len, ctrl_cpu0; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + exit_qual = vmcs_read(EXI_QUALIFICATION); + insn_len = vmcs_read(EXI_INST_LEN); + switch (reason) { + case VMX_IO: + switch (vmx_get_test_stage()) { + case 0: + case 1: + vmx_inc_test_stage(); + break; + case 2: + report("I/O bitmap - I/O width, byte", + (exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_BYTE); + report("I/O bitmap - I/O direction, in", exit_qual & VMX_IO_IN); + vmx_inc_test_stage(); + break; + case 3: + report("I/O bitmap - I/O width, word", + (exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_WORD); + report("I/O bitmap - I/O direction, out", + !(exit_qual & VMX_IO_IN)); + vmx_inc_test_stage(); + break; + case 4: + report("I/O bitmap - I/O width, long", + (exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_LONG); + vmx_inc_test_stage(); + break; + case 5: + if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x5000) + vmx_inc_test_stage(); + break; + case 6: + if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x9000) + vmx_inc_test_stage(); + break; + case 7: + if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x4FFF) + vmx_inc_test_stage(); + break; + case 8: + if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0xFFFF) + vmx_inc_test_stage(); + break; + case 9: + case 10: + ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0 & ~CPU_IO); + vmx_inc_test_stage(); + break; + default: + // Should not reach here + printf("ERROR : unexpected stage, %d\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + case VMX_VMCALL: + switch (vmx_get_test_stage()) { + case 9: + ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); + ctrl_cpu0 |= CPU_IO | CPU_IO_BITMAP; + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); + break; + case 10: + ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); + ctrl_cpu0 = (ctrl_cpu0 & ~CPU_IO_BITMAP) | CPU_IO; + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); + break; + default: + // Should not reach here + printf("ERROR : unexpected stage, %d\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + default: + printf("guest_rip = 0x%lx\n", guest_rip); + printf("\tERROR : Undefined exit reason, reason = %ld.\n", reason); + break; + } + return VMX_TEST_VMEXIT; +} + +#define INSN_CPU0 0 +#define INSN_CPU1 1 +#define INSN_ALWAYS_TRAP 2 + +#define FIELD_EXIT_QUAL (1 << 0) +#define FIELD_INSN_INFO (1 << 1) + +asm( + "insn_hlt: hlt;ret\n\t" + "insn_invlpg: invlpg 0x12345678;ret\n\t" + "insn_mwait: mwait;ret\n\t" + "insn_rdpmc: xor %ecx, %ecx; rdpmc;ret\n\t" + "insn_rdtsc: rdtsc;ret\n\t" + "insn_cr3_load: mov cr3,%rax; mov %rax,%cr3;ret\n\t" + "insn_cr3_store: mov %cr3,%rax;ret\n\t" +#ifdef __x86_64__ + "insn_cr8_load: mov %rax,%cr8;ret\n\t" + "insn_cr8_store: mov %cr8,%rax;ret\n\t" +#endif + "insn_monitor: monitor;ret\n\t" + "insn_pause: pause;ret\n\t" + "insn_wbinvd: wbinvd;ret\n\t" + "insn_cpuid: mov $10, %eax; cpuid;ret\n\t" + "insn_invd: invd;ret\n\t" + "insn_sgdt: sgdt gdt64_desc;ret\n\t" + "insn_lgdt: lgdt gdt64_desc;ret\n\t" + "insn_sidt: sidt idt_descr;ret\n\t" + "insn_lidt: lidt idt_descr;ret\n\t" + "insn_sldt: sldt %ax;ret\n\t" + "insn_lldt: xor %eax, %eax; lldt %ax;ret\n\t" + "insn_str: str %ax;ret\n\t" +); +extern void insn_hlt(); +extern void insn_invlpg(); +extern void insn_mwait(); +extern void insn_rdpmc(); +extern void insn_rdtsc(); +extern void insn_cr3_load(); +extern void insn_cr3_store(); +#ifdef __x86_64__ +extern void insn_cr8_load(); +extern void insn_cr8_store(); +#endif +extern void insn_monitor(); +extern void insn_pause(); +extern void insn_wbinvd(); +extern void insn_sgdt(); +extern void insn_lgdt(); +extern void insn_sidt(); +extern void insn_lidt(); +extern void insn_sldt(); +extern void insn_lldt(); +extern void insn_str(); +extern void insn_cpuid(); +extern void insn_invd(); + +u32 cur_insn; +u64 cr3; + +struct insn_table { + const char *name; + u32 flag; + void (*insn_func)(); + u32 type; + u32 reason; + ulong exit_qual; + u32 insn_info; + // Use FIELD_EXIT_QUAL and FIELD_INSN_INFO to define + // which field need to be tested, reason is always tested + u32 test_field; +}; + +/* + * Add more test cases of instruction intercept here. Elements in this + * table is: + * name/control flag/insn function/type/exit reason/exit qulification/ + * instruction info/field to test + * The last field defines which fields (exit_qual and insn_info) need to be + * tested in exit handler. If set to 0, only "reason" is checked. + */ +static struct insn_table insn_table[] = { + // Flags for Primary Processor-Based VM-Execution Controls + {"HLT", CPU_HLT, insn_hlt, INSN_CPU0, 12, 0, 0, 0}, + {"INVLPG", CPU_INVLPG, insn_invlpg, INSN_CPU0, 14, + 0x12345678, 0, FIELD_EXIT_QUAL}, + {"MWAIT", CPU_MWAIT, insn_mwait, INSN_CPU0, 36, 0, 0, 0}, + {"RDPMC", CPU_RDPMC, insn_rdpmc, INSN_CPU0, 15, 0, 0, 0}, + {"RDTSC", CPU_RDTSC, insn_rdtsc, INSN_CPU0, 16, 0, 0, 0}, + {"CR3 load", CPU_CR3_LOAD, insn_cr3_load, INSN_CPU0, 28, 0x3, 0, + FIELD_EXIT_QUAL}, + {"CR3 store", CPU_CR3_STORE, insn_cr3_store, INSN_CPU0, 28, 0x13, 0, + FIELD_EXIT_QUAL}, +#ifdef __x86_64__ + {"CR8 load", CPU_CR8_LOAD, insn_cr8_load, INSN_CPU0, 28, 0x8, 0, + FIELD_EXIT_QUAL}, + {"CR8 store", CPU_CR8_STORE, insn_cr8_store, INSN_CPU0, 28, 0x18, 0, + FIELD_EXIT_QUAL}, +#endif + {"MONITOR", CPU_MONITOR, insn_monitor, INSN_CPU0, 39, 0, 0, 0}, + {"PAUSE", CPU_PAUSE, insn_pause, INSN_CPU0, 40, 0, 0, 0}, + // Flags for Secondary Processor-Based VM-Execution Controls + {"WBINVD", CPU_WBINVD, insn_wbinvd, INSN_CPU1, 54, 0, 0, 0}, + {"DESC_TABLE (SGDT)", CPU_DESC_TABLE, insn_sgdt, INSN_CPU1, 46, 0, 0, 0}, + {"DESC_TABLE (LGDT)", CPU_DESC_TABLE, insn_lgdt, INSN_CPU1, 46, 0, 0, 0}, + {"DESC_TABLE (SIDT)", CPU_DESC_TABLE, insn_sidt, INSN_CPU1, 46, 0, 0, 0}, + {"DESC_TABLE (LIDT)", CPU_DESC_TABLE, insn_lidt, INSN_CPU1, 46, 0, 0, 0}, + {"DESC_TABLE (SLDT)", CPU_DESC_TABLE, insn_sldt, INSN_CPU1, 47, 0, 0, 0}, + {"DESC_TABLE (LLDT)", CPU_DESC_TABLE, insn_lldt, INSN_CPU1, 47, 0, 0, 0}, + {"DESC_TABLE (STR)", CPU_DESC_TABLE, insn_str, INSN_CPU1, 47, 0, 0, 0}, + /* LTR causes a #GP if done with a busy selector, so it is not tested. */ + // Instructions always trap + {"CPUID", 0, insn_cpuid, INSN_ALWAYS_TRAP, 10, 0, 0, 0}, + {"INVD", 0, insn_invd, INSN_ALWAYS_TRAP, 13, 0, 0, 0}, + // Instructions never trap + {NULL}, +}; + +static int insn_intercept_init() +{ + u32 ctrl_cpu; + + ctrl_cpu = ctrl_cpu_rev[0].set | CPU_SECONDARY; + ctrl_cpu &= ctrl_cpu_rev[0].clr; + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu); + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu_rev[1].set); + cr3 = read_cr3(); + return VMX_TEST_START; +} + +static void insn_intercept_main() +{ + for (cur_insn = 0; insn_table[cur_insn].name != NULL; cur_insn++) { + vmx_set_test_stage(cur_insn * 2); + if ((insn_table[cur_insn].type == INSN_CPU0 && + !(ctrl_cpu_rev[0].clr & insn_table[cur_insn].flag)) || + (insn_table[cur_insn].type == INSN_CPU1 && + !(ctrl_cpu_rev[1].clr & insn_table[cur_insn].flag))) { + printf("\tCPU_CTRL%d.CPU_%s is not supported.\n", + insn_table[cur_insn].type - INSN_CPU0, + insn_table[cur_insn].name); + continue; + } + + if ((insn_table[cur_insn].type == INSN_CPU0 && + !(ctrl_cpu_rev[0].set & insn_table[cur_insn].flag)) || + (insn_table[cur_insn].type == INSN_CPU1 && + !(ctrl_cpu_rev[1].set & insn_table[cur_insn].flag))) { + /* skip hlt, it stalls the guest and is tested below */ + if (insn_table[cur_insn].insn_func != insn_hlt) + insn_table[cur_insn].insn_func(); + report("execute %s", vmx_get_test_stage() == cur_insn * 2, + insn_table[cur_insn].name); + } else if (insn_table[cur_insn].type != INSN_ALWAYS_TRAP) + printf("\tCPU_CTRL%d.CPU_%s always traps.\n", + insn_table[cur_insn].type - INSN_CPU0, + insn_table[cur_insn].name); + + vmcall(); + + insn_table[cur_insn].insn_func(); + report("intercept %s", vmx_get_test_stage() == cur_insn * 2 + 1, + insn_table[cur_insn].name); + + vmx_set_test_stage(cur_insn * 2 + 1); + vmcall(); + } +} + +static int insn_intercept_exit_handler() +{ + u64 guest_rip; + u32 reason; + ulong exit_qual; + u32 insn_len; + u32 insn_info; + bool pass; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + exit_qual = vmcs_read(EXI_QUALIFICATION); + insn_len = vmcs_read(EXI_INST_LEN); + insn_info = vmcs_read(EXI_INST_INFO); + + if (reason == VMX_VMCALL) { + u32 val = 0; + + if (insn_table[cur_insn].type == INSN_CPU0) + val = vmcs_read(CPU_EXEC_CTRL0); + else if (insn_table[cur_insn].type == INSN_CPU1) + val = vmcs_read(CPU_EXEC_CTRL1); + + if (vmx_get_test_stage() & 1) + val &= ~insn_table[cur_insn].flag; + else + val |= insn_table[cur_insn].flag; + + if (insn_table[cur_insn].type == INSN_CPU0) + vmcs_write(CPU_EXEC_CTRL0, val | ctrl_cpu_rev[0].set); + else if (insn_table[cur_insn].type == INSN_CPU1) + vmcs_write(CPU_EXEC_CTRL1, val | ctrl_cpu_rev[1].set); + } else { + pass = (cur_insn * 2 == vmx_get_test_stage()) && + insn_table[cur_insn].reason == reason; + if (insn_table[cur_insn].test_field & FIELD_EXIT_QUAL && + insn_table[cur_insn].exit_qual != exit_qual) + pass = false; + if (insn_table[cur_insn].test_field & FIELD_INSN_INFO && + insn_table[cur_insn].insn_info != insn_info) + pass = false; + if (pass) + vmx_inc_test_stage(); + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; +} + + +static int setup_ept() +{ + int support_2m; + unsigned long end_of_memory; + + if (!(ept_vpid.val & EPT_CAP_UC) && + !(ept_vpid.val & EPT_CAP_WB)) { + printf("\tEPT paging-structure memory type " + "UC&WB are not supported\n"); + return 1; + } + if (ept_vpid.val & EPT_CAP_UC) + eptp = EPT_MEM_TYPE_UC; + else + eptp = EPT_MEM_TYPE_WB; + if (!(ept_vpid.val & EPT_CAP_PWL4)) { + printf("\tPWL4 is not supported\n"); + return 1; + } + eptp |= (3 << EPTP_PG_WALK_LEN_SHIFT); + pml4 = alloc_page(); + memset(pml4, 0, PAGE_SIZE); + eptp |= virt_to_phys(pml4); + vmcs_write(EPTP, eptp); + support_2m = !!(ept_vpid.val & EPT_CAP_2M_PAGE); + end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE); + if (end_of_memory < (1ul << 32)) + end_of_memory = (1ul << 32); + setup_ept_range(pml4, 0, end_of_memory, 0, support_2m, + EPT_WA | EPT_RA | EPT_EA); + return 0; +} + +static int apic_version; + +static int ept_init() +{ + u32 ctrl_cpu[2]; + + if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) || + !(ctrl_cpu_rev[1].clr & CPU_EPT)) { + printf("\tEPT is not supported"); + return VMX_TEST_EXIT; + } + + ctrl_cpu[0] = vmcs_read(CPU_EXEC_CTRL0); + ctrl_cpu[1] = vmcs_read(CPU_EXEC_CTRL1); + ctrl_cpu[0] = (ctrl_cpu[0] | CPU_SECONDARY) + & ctrl_cpu_rev[0].clr; + ctrl_cpu[1] = (ctrl_cpu[1] | CPU_EPT) + & ctrl_cpu_rev[1].clr; + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); + if (setup_ept()) + return VMX_TEST_EXIT; + data_page1 = alloc_page(); + data_page2 = alloc_page(); + memset(data_page1, 0x0, PAGE_SIZE); + memset(data_page2, 0x0, PAGE_SIZE); + *((u32 *)data_page1) = MAGIC_VAL_1; + *((u32 *)data_page2) = MAGIC_VAL_2; + install_ept(pml4, (unsigned long)data_page1, (unsigned long)data_page2, + EPT_RA | EPT_WA | EPT_EA); + + apic_version = *((u32 *)0xfee00030UL); + return VMX_TEST_START; +} + +static void ept_main() +{ + vmx_set_test_stage(0); + if (*((u32 *)data_page2) != MAGIC_VAL_1 || + *((u32 *)data_page1) != MAGIC_VAL_1) + report("EPT basic framework - read", 0); + else { + *((u32 *)data_page2) = MAGIC_VAL_3; + vmcall(); + if (vmx_get_test_stage() == 1) { + if (*((u32 *)data_page1) == MAGIC_VAL_3 && + *((u32 *)data_page2) == MAGIC_VAL_2) + report("EPT basic framework", 1); + else + report("EPT basic framework - remap", 1); + } + } + // Test EPT Misconfigurations + vmx_set_test_stage(1); + vmcall(); + *((u32 *)data_page1) = MAGIC_VAL_1; + if (vmx_get_test_stage() != 2) { + report("EPT misconfigurations", 0); + goto t1; + } + vmx_set_test_stage(2); + vmcall(); + *((u32 *)data_page1) = MAGIC_VAL_1; + report("EPT misconfigurations", vmx_get_test_stage() == 3); +t1: + // Test EPT violation + vmx_set_test_stage(3); + vmcall(); + *((u32 *)data_page1) = MAGIC_VAL_1; + report("EPT violation - page permission", vmx_get_test_stage() == 4); + // Violation caused by EPT paging structure + vmx_set_test_stage(4); + vmcall(); + *((u32 *)data_page1) = MAGIC_VAL_2; + report("EPT violation - paging structure", vmx_get_test_stage() == 5); + + // Test EPT access to L1 MMIO + vmx_set_test_stage(6); + report("EPT - MMIO access", *((u32 *)0xfee00030UL) == apic_version); + + // Test invalid operand for INVEPT + vmcall(); + report("EPT - unsupported INVEPT", vmx_get_test_stage() == 7); +} + +bool invept_test(int type, u64 eptp) +{ + bool ret, supported; + + supported = ept_vpid.val & (EPT_CAP_INVEPT_SINGLE >> INVEPT_SINGLE << type); + ret = invept(type, eptp); + + if (ret == !supported) + return false; + + if (!supported) + printf("WARNING: unsupported invept passed!\n"); + else + printf("WARNING: invept failed!\n"); + + return true; +} + +static int ept_exit_handler() +{ + u64 guest_rip; + ulong reason; + u32 insn_len; + u32 exit_qual; + static unsigned long data_page1_pte, data_page1_pte_pte; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + insn_len = vmcs_read(EXI_INST_LEN); + exit_qual = vmcs_read(EXI_QUALIFICATION); + switch (reason) { + case VMX_VMCALL: + switch (vmx_get_test_stage()) { + case 0: + if (*((u32 *)data_page1) == MAGIC_VAL_3 && + *((u32 *)data_page2) == MAGIC_VAL_2) { + vmx_inc_test_stage(); + install_ept(pml4, (unsigned long)data_page2, + (unsigned long)data_page2, + EPT_RA | EPT_WA | EPT_EA); + } else + report("EPT basic framework - write", 0); + break; + case 1: + install_ept(pml4, (unsigned long)data_page1, + (unsigned long)data_page1, EPT_WA); + ept_sync(INVEPT_SINGLE, eptp); + break; + case 2: + install_ept(pml4, (unsigned long)data_page1, + (unsigned long)data_page1, + EPT_RA | EPT_WA | EPT_EA | + (2 << EPT_MEM_TYPE_SHIFT)); + ept_sync(INVEPT_SINGLE, eptp); + break; + case 3: + data_page1_pte = get_ept_pte(pml4, + (unsigned long)data_page1, 1); + set_ept_pte(pml4, (unsigned long)data_page1, + 1, data_page1_pte & (~EPT_PRESENT)); + ept_sync(INVEPT_SINGLE, eptp); + break; + case 4: + data_page1_pte = get_ept_pte(pml4, + (unsigned long)data_page1, 2); + data_page1_pte &= PAGE_MASK; + data_page1_pte_pte = get_ept_pte(pml4, data_page1_pte, 2); + set_ept_pte(pml4, data_page1_pte, 2, + data_page1_pte_pte & (~EPT_PRESENT)); + ept_sync(INVEPT_SINGLE, eptp); + break; + case 6: + if (!invept_test(0, eptp)) + vmx_inc_test_stage(); + break; + // Should not reach here + default: + printf("ERROR - unexpected stage, %d.\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + case VMX_EPT_MISCONFIG: + switch (vmx_get_test_stage()) { + case 1: + case 2: + vmx_inc_test_stage(); + install_ept(pml4, (unsigned long)data_page1, + (unsigned long)data_page1, + EPT_RA | EPT_WA | EPT_EA); + ept_sync(INVEPT_SINGLE, eptp); + break; + // Should not reach here + default: + printf("ERROR - unexpected stage, %d.\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + return VMX_TEST_RESUME; + case VMX_EPT_VIOLATION: + switch(vmx_get_test_stage()) { + case 3: + if (exit_qual == (EPT_VLT_WR | EPT_VLT_LADDR_VLD | + EPT_VLT_PADDR)) + vmx_inc_test_stage(); + set_ept_pte(pml4, (unsigned long)data_page1, + 1, data_page1_pte | (EPT_PRESENT)); + ept_sync(INVEPT_SINGLE, eptp); + break; + case 4: + if (exit_qual == (EPT_VLT_RD | EPT_VLT_LADDR_VLD)) + vmx_inc_test_stage(); + set_ept_pte(pml4, data_page1_pte, 2, + data_page1_pte_pte | (EPT_PRESENT)); + ept_sync(INVEPT_SINGLE, eptp); + break; + default: + // Should not reach here + printf("ERROR : unexpected stage, %d\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + return VMX_TEST_RESUME; + default: + printf("Unknown exit reason, %ld\n", reason); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + +bool invvpid_test(int type, u16 vpid) +{ + bool ret, supported; + + supported = ept_vpid.val & (VPID_CAP_INVVPID_SINGLE >> INVVPID_SINGLE << type); + ret = invvpid(type, vpid, 0); + + if (ret == !supported) + return false; + + if (!supported) + printf("WARNING: unsupported invvpid passed!\n"); + else + printf("WARNING: invvpid failed!\n"); + + return true; +} + +static int vpid_init() +{ + u32 ctrl_cpu1; + + if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) || + !(ctrl_cpu_rev[1].clr & CPU_VPID)) { + printf("\tVPID is not supported"); + return VMX_TEST_EXIT; + } + + ctrl_cpu1 = vmcs_read(CPU_EXEC_CTRL1); + ctrl_cpu1 |= CPU_VPID; + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu1); + return VMX_TEST_START; +} + +static void vpid_main() +{ + vmx_set_test_stage(0); + vmcall(); + report("INVVPID SINGLE ADDRESS", vmx_get_test_stage() == 1); + vmx_set_test_stage(2); + vmcall(); + report("INVVPID SINGLE", vmx_get_test_stage() == 3); + vmx_set_test_stage(4); + vmcall(); + report("INVVPID ALL", vmx_get_test_stage() == 5); +} + +static int vpid_exit_handler() +{ + u64 guest_rip; + ulong reason; + u32 insn_len; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + insn_len = vmcs_read(EXI_INST_LEN); + + switch (reason) { + case VMX_VMCALL: + switch(vmx_get_test_stage()) { + case 0: + if (!invvpid_test(INVVPID_SINGLE_ADDRESS, 1)) + vmx_inc_test_stage(); + break; + case 2: + if (!invvpid_test(INVVPID_SINGLE, 1)) + vmx_inc_test_stage(); + break; + case 4: + if (!invvpid_test(INVVPID_ALL, 1)) + vmx_inc_test_stage(); + break; + default: + printf("ERROR: unexpected stage, %d\n", + vmx_get_test_stage()); + print_vmexit_info(); + return VMX_TEST_VMEXIT; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + default: + printf("Unknown exit reason, %ld\n", reason); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + +#define TIMER_VECTOR 222 + +static volatile bool timer_fired; + +static void timer_isr(isr_regs_t *regs) +{ + timer_fired = true; + apic_write(APIC_EOI, 0); +} + +static int interrupt_init(struct vmcs *vmcs) +{ + msr_bmp_init(); + vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT); + handle_irq(TIMER_VECTOR, timer_isr); + return VMX_TEST_START; +} + +static void interrupt_main(void) +{ + long long start, loops; + + vmx_set_test_stage(0); + + apic_write(APIC_LVTT, TIMER_VECTOR); + irq_enable(); + + apic_write(APIC_TMICT, 1); + for (loops = 0; loops < 10000000 && !timer_fired; loops++) + asm volatile ("nop"); + report("direct interrupt while running guest", timer_fired); + + apic_write(APIC_TMICT, 0); + irq_disable(); + vmcall(); + timer_fired = false; + apic_write(APIC_TMICT, 1); + for (loops = 0; loops < 10000000 && !timer_fired; loops++) + asm volatile ("nop"); + report("intercepted interrupt while running guest", timer_fired); + + irq_enable(); + apic_write(APIC_TMICT, 0); + irq_disable(); + vmcall(); + timer_fired = false; + start = rdtsc(); + apic_write(APIC_TMICT, 1000000); + + asm volatile ("sti; hlt"); + + report("direct interrupt + hlt", + rdtsc() - start > 1000000 && timer_fired); + + apic_write(APIC_TMICT, 0); + irq_disable(); + vmcall(); + timer_fired = false; + start = rdtsc(); + apic_write(APIC_TMICT, 1000000); + + asm volatile ("sti; hlt"); + + report("intercepted interrupt + hlt", + rdtsc() - start > 10000 && timer_fired); + + apic_write(APIC_TMICT, 0); + irq_disable(); + vmcall(); + timer_fired = false; + start = rdtsc(); + apic_write(APIC_TMICT, 1000000); + + irq_enable(); + asm volatile ("nop"); + vmcall(); + + report("direct interrupt + activity state hlt", + rdtsc() - start > 10000 && timer_fired); + + apic_write(APIC_TMICT, 0); + irq_disable(); + vmcall(); + timer_fired = false; + start = rdtsc(); + apic_write(APIC_TMICT, 1000000); + + irq_enable(); + asm volatile ("nop"); + vmcall(); + + report("intercepted interrupt + activity state hlt", + rdtsc() - start > 10000 && timer_fired); + + apic_write(APIC_TMICT, 0); + irq_disable(); + vmx_set_test_stage(7); + vmcall(); + timer_fired = false; + apic_write(APIC_TMICT, 1); + for (loops = 0; loops < 10000000 && !timer_fired; loops++) + asm volatile ("nop"); + report("running a guest with interrupt acknowledgement set", timer_fired); +} + +static int interrupt_exit_handler(void) +{ + u64 guest_rip = vmcs_read(GUEST_RIP); + ulong reason = vmcs_read(EXI_REASON) & 0xff; + u32 insn_len = vmcs_read(EXI_INST_LEN); + + switch (reason) { + case VMX_VMCALL: + switch (vmx_get_test_stage()) { + case 0: + case 2: + case 5: + vmcs_write(PIN_CONTROLS, + vmcs_read(PIN_CONTROLS) | PIN_EXTINT); + break; + case 7: + vmcs_write(EXI_CONTROLS, vmcs_read(EXI_CONTROLS) | EXI_INTA); + vmcs_write(PIN_CONTROLS, + vmcs_read(PIN_CONTROLS) | PIN_EXTINT); + break; + case 1: + case 3: + vmcs_write(PIN_CONTROLS, + vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT); + break; + case 4: + case 6: + vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); + break; + } + vmx_inc_test_stage(); + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + case VMX_EXTINT: + if (vmcs_read(EXI_CONTROLS) & EXI_INTA) { + int vector = vmcs_read(EXI_INTR_INFO) & 0xff; + handle_external_interrupt(vector); + } else { + irq_enable(); + asm volatile ("nop"); + irq_disable(); + } + if (vmx_get_test_stage() >= 2) + vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); + return VMX_TEST_RESUME; + default: + printf("Unknown exit reason, %ld\n", reason); + print_vmexit_info(); + } + + return VMX_TEST_VMEXIT; +} + +static int dbgctls_init(struct vmcs *vmcs) +{ + u64 dr7 = 0x402; + u64 zero = 0; + + msr_bmp_init(); + asm volatile( + "mov %0,%%dr0\n\t" + "mov %0,%%dr1\n\t" + "mov %0,%%dr2\n\t" + "mov %1,%%dr7\n\t" + : : "r" (zero), "r" (dr7)); + wrmsr(MSR_IA32_DEBUGCTLMSR, 0x1); + vmcs_write(GUEST_DR7, 0x404); + vmcs_write(GUEST_DEBUGCTL, 0x2); + + vmcs_write(ENT_CONTROLS, vmcs_read(ENT_CONTROLS) | ENT_LOAD_DBGCTLS); + vmcs_write(EXI_CONTROLS, vmcs_read(EXI_CONTROLS) | EXI_SAVE_DBGCTLS); + + return VMX_TEST_START; +} + +static void dbgctls_main(void) +{ + u64 dr7, debugctl; + + asm volatile("mov %%dr7,%0" : "=r" (dr7)); + debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR); + /* Commented out: KVM does not support DEBUGCTL so far */ + (void)debugctl; + report("Load debug controls", dr7 == 0x404 /* && debugctl == 0x2 */); + + dr7 = 0x408; + asm volatile("mov %0,%%dr7" : : "r" (dr7)); + wrmsr(MSR_IA32_DEBUGCTLMSR, 0x3); + + vmx_set_test_stage(0); + vmcall(); + report("Save debug controls", vmx_get_test_stage() == 1); + + if (ctrl_enter_rev.set & ENT_LOAD_DBGCTLS || + ctrl_exit_rev.set & EXI_SAVE_DBGCTLS) { + printf("\tDebug controls are always loaded/saved\n"); + return; + } + vmx_set_test_stage(2); + vmcall(); + + asm volatile("mov %%dr7,%0" : "=r" (dr7)); + debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR); + /* Commented out: KVM does not support DEBUGCTL so far */ + (void)debugctl; + report("Guest=host debug controls", dr7 == 0x402 /* && debugctl == 0x1 */); + + dr7 = 0x408; + asm volatile("mov %0,%%dr7" : : "r" (dr7)); + wrmsr(MSR_IA32_DEBUGCTLMSR, 0x3); + + vmx_set_test_stage(3); + vmcall(); + report("Don't save debug controls", vmx_get_test_stage() == 4); +} + +static int dbgctls_exit_handler(void) +{ + unsigned int reason = vmcs_read(EXI_REASON) & 0xff; + u32 insn_len = vmcs_read(EXI_INST_LEN); + u64 guest_rip = vmcs_read(GUEST_RIP); + u64 dr7, debugctl; + + asm volatile("mov %%dr7,%0" : "=r" (dr7)); + debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR); + + switch (reason) { + case VMX_VMCALL: + switch (vmx_get_test_stage()) { + case 0: + if (dr7 == 0x400 && debugctl == 0 && + vmcs_read(GUEST_DR7) == 0x408 /* && + Commented out: KVM does not support DEBUGCTL so far + vmcs_read(GUEST_DEBUGCTL) == 0x3 */) + vmx_inc_test_stage(); + break; + case 2: + dr7 = 0x402; + asm volatile("mov %0,%%dr7" : : "r" (dr7)); + wrmsr(MSR_IA32_DEBUGCTLMSR, 0x1); + vmcs_write(GUEST_DR7, 0x404); + vmcs_write(GUEST_DEBUGCTL, 0x2); + + vmcs_write(ENT_CONTROLS, + vmcs_read(ENT_CONTROLS) & ~ENT_LOAD_DBGCTLS); + vmcs_write(EXI_CONTROLS, + vmcs_read(EXI_CONTROLS) & ~EXI_SAVE_DBGCTLS); + break; + case 3: + if (dr7 == 0x400 && debugctl == 0 && + vmcs_read(GUEST_DR7) == 0x404 /* && + Commented out: KVM does not support DEBUGCTL so far + vmcs_read(GUEST_DEBUGCTL) == 0x2 */) + vmx_inc_test_stage(); + break; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + default: + printf("Unknown exit reason, %d\n", reason); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + +struct vmx_msr_entry { + u32 index; + u32 reserved; + u64 value; +} __attribute__((packed)); + +#define MSR_MAGIC 0x31415926 +struct vmx_msr_entry *exit_msr_store, *entry_msr_load, *exit_msr_load; + +static int msr_switch_init(struct vmcs *vmcs) +{ + msr_bmp_init(); + exit_msr_store = alloc_page(); + exit_msr_load = alloc_page(); + entry_msr_load = alloc_page(); + memset(exit_msr_store, 0, PAGE_SIZE); + memset(exit_msr_load, 0, PAGE_SIZE); + memset(entry_msr_load, 0, PAGE_SIZE); + entry_msr_load[0].index = MSR_KERNEL_GS_BASE; + entry_msr_load[0].value = MSR_MAGIC; + + vmx_set_test_stage(1); + vmcs_write(ENT_MSR_LD_CNT, 1); + vmcs_write(ENTER_MSR_LD_ADDR, (u64)entry_msr_load); + vmcs_write(EXI_MSR_ST_CNT, 1); + vmcs_write(EXIT_MSR_ST_ADDR, (u64)exit_msr_store); + vmcs_write(EXI_MSR_LD_CNT, 1); + vmcs_write(EXIT_MSR_LD_ADDR, (u64)exit_msr_load); + return VMX_TEST_START; +} + +static void msr_switch_main() +{ + if (vmx_get_test_stage() == 1) { + report("VM entry MSR load", + rdmsr(MSR_KERNEL_GS_BASE) == MSR_MAGIC); + vmx_set_test_stage(2); + wrmsr(MSR_KERNEL_GS_BASE, MSR_MAGIC + 1); + exit_msr_store[0].index = MSR_KERNEL_GS_BASE; + exit_msr_load[0].index = MSR_KERNEL_GS_BASE; + exit_msr_load[0].value = MSR_MAGIC + 2; + } + vmcall(); +} + +static int msr_switch_exit_handler() +{ + ulong reason; + + reason = vmcs_read(EXI_REASON); + if (reason == VMX_VMCALL && vmx_get_test_stage() == 2) { + report("VM exit MSR store", + exit_msr_store[0].value == MSR_MAGIC + 1); + report("VM exit MSR load", + rdmsr(MSR_KERNEL_GS_BASE) == MSR_MAGIC + 2); + vmx_set_test_stage(3); + entry_msr_load[0].index = MSR_FS_BASE; + return VMX_TEST_RESUME; + } + printf("ERROR %s: unexpected stage=%u or reason=%lu\n", + __func__, vmx_get_test_stage(), reason); + return VMX_TEST_EXIT; +} + +static int msr_switch_entry_failure(struct vmentry_failure *failure) +{ + ulong reason; + + if (failure->early) { + printf("ERROR %s: early exit\n", __func__); + return VMX_TEST_EXIT; + } + + reason = vmcs_read(EXI_REASON); + if (reason == (VMX_ENTRY_FAILURE | VMX_FAIL_MSR) && + vmx_get_test_stage() == 3) { + report("VM entry MSR load: try to load FS_BASE", + vmcs_read(EXI_QUALIFICATION) == 1); + return VMX_TEST_VMEXIT; + } + printf("ERROR %s: unexpected stage=%u or reason=%lu\n", + __func__, vmx_get_test_stage(), reason); + return VMX_TEST_EXIT; +} + +static int vmmcall_init(struct vmcs *vmcs ) +{ + vmcs_write(EXC_BITMAP, 1 << UD_VECTOR); + return VMX_TEST_START; +} + +static void vmmcall_main(void) +{ + asm volatile( + "mov $0xABCD, %%rax\n\t" + "vmmcall\n\t" + ::: "rax"); + + report("VMMCALL", 0); +} + +static int vmmcall_exit_handler() +{ + ulong reason; + + reason = vmcs_read(EXI_REASON); + switch (reason) { + case VMX_VMCALL: + printf("here\n"); + report("VMMCALL triggers #UD", 0); + break; + case VMX_EXC_NMI: + report("VMMCALL triggers #UD", + (vmcs_read(EXI_INTR_INFO) & 0xff) == UD_VECTOR); + break; + default: + printf("Unknown exit reason, %ld\n", reason); + print_vmexit_info(); + } + + return VMX_TEST_VMEXIT; +} + +static int disable_rdtscp_init(struct vmcs *vmcs) +{ + u32 ctrl_cpu1; + + if (ctrl_cpu_rev[0].clr & CPU_SECONDARY) { + ctrl_cpu1 = vmcs_read(CPU_EXEC_CTRL1); + ctrl_cpu1 &= ~CPU_RDTSCP; + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu1); + } + + return VMX_TEST_START; +} + +static void disable_rdtscp_ud_handler(struct ex_regs *regs) +{ + switch (vmx_get_test_stage()) { + case 0: + report("RDTSCP triggers #UD", true); + vmx_inc_test_stage(); + regs->rip += 3; + break; + case 2: + report("RDPID triggers #UD", true); + vmx_inc_test_stage(); + regs->rip += 4; + break; + } + return; + +} + +static void disable_rdtscp_main(void) +{ + /* Test that #UD is properly injected in L2. */ + handle_exception(UD_VECTOR, disable_rdtscp_ud_handler); + + vmx_set_test_stage(0); + asm volatile("rdtscp" : : : "eax", "ecx", "edx"); + vmcall(); + asm volatile(".byte 0xf3, 0x0f, 0xc7, 0xf8" : : : "eax"); + vmcall(); +} + +static int disable_rdtscp_exit_handler(void) +{ + unsigned int reason = vmcs_read(EXI_REASON) & 0xff; + + switch (reason) { + case VMX_VMCALL: + switch (vmx_get_test_stage()) { + case 0: + report("RDTSCP triggers #UD", false); + vmx_inc_test_stage(); + /* fallthrough */ + case 1: + vmx_inc_test_stage(); + vmcs_write(GUEST_RIP, vmcs_read(GUEST_RIP) + 3); + return VMX_TEST_RESUME; + case 2: + report("RDPID triggers #UD", false); + break; + } + break; + + default: + printf("Unknown exit reason, %d\n", reason); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + +int int3_init() +{ + vmcs_write(EXC_BITMAP, ~0u); + return VMX_TEST_START; +} + +void int3_guest_main() +{ + asm volatile ("int3"); +} + +int int3_exit_handler() +{ + u32 reason = vmcs_read(EXI_REASON); + u32 intr_info = vmcs_read(EXI_INTR_INFO); + + report("L1 intercepts #BP", reason == VMX_EXC_NMI && + (intr_info & INTR_INFO_VALID_MASK) && + (intr_info & INTR_INFO_VECTOR_MASK) == BP_VECTOR && + ((intr_info & INTR_INFO_INTR_TYPE_MASK) >> + INTR_INFO_INTR_TYPE_SHIFT) == VMX_INTR_TYPE_SOFT_EXCEPTION); + + return VMX_TEST_VMEXIT; +} + +int into_init() +{ + vmcs_write(EXC_BITMAP, ~0u); + return VMX_TEST_START; +} + +void into_guest_main() +{ + struct far_pointer32 fp = { + .offset = (uintptr_t)&&into, + .selector = KERNEL_CS32, + }; + register uintptr_t rsp asm("rsp"); + + if (fp.offset != (uintptr_t)&&into) { + printf("Code address too high.\n"); + return; + } + if ((u32)rsp != rsp) { + printf("Stack address too high.\n"); + return; + } + + asm goto ("lcall *%0" : : "m" (fp) : "rax" : into); + return; +into: + asm volatile (".code32;" + "movl $0x7fffffff, %eax;" + "addl %eax, %eax;" + "into;" + "lret;" + ".code64"); + __builtin_unreachable(); +} + +int into_exit_handler() +{ + u32 reason = vmcs_read(EXI_REASON); + u32 intr_info = vmcs_read(EXI_INTR_INFO); + + report("L1 intercepts #OF", reason == VMX_EXC_NMI && + (intr_info & INTR_INFO_VALID_MASK) && + (intr_info & INTR_INFO_VECTOR_MASK) == OF_VECTOR && + ((intr_info & INTR_INFO_INTR_TYPE_MASK) >> + INTR_INFO_INTR_TYPE_SHIFT) == VMX_INTR_TYPE_SOFT_EXCEPTION); + + return VMX_TEST_VMEXIT; +} + +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs */ +struct vmx_test vmx_tests[] = { + { "null", NULL, basic_guest_main, basic_exit_handler, NULL, {0} }, + { "vmenter", NULL, vmenter_main, vmenter_exit_handler, NULL, {0} }, + { "preemption timer", preemption_timer_init, preemption_timer_main, + preemption_timer_exit_handler, NULL, {0} }, + { "control field PAT", test_ctrl_pat_init, test_ctrl_pat_main, + test_ctrl_pat_exit_handler, NULL, {0} }, + { "control field EFER", test_ctrl_efer_init, test_ctrl_efer_main, + test_ctrl_efer_exit_handler, NULL, {0} }, + { "CR shadowing", NULL, cr_shadowing_main, + cr_shadowing_exit_handler, NULL, {0} }, + { "I/O bitmap", iobmp_init, iobmp_main, iobmp_exit_handler, + NULL, {0} }, + { "instruction intercept", insn_intercept_init, insn_intercept_main, + insn_intercept_exit_handler, NULL, {0} }, + { "EPT framework", ept_init, ept_main, ept_exit_handler, NULL, {0} }, + { "VPID", vpid_init, vpid_main, vpid_exit_handler, NULL, {0} }, + { "interrupt", interrupt_init, interrupt_main, + interrupt_exit_handler, NULL, {0} }, + { "debug controls", dbgctls_init, dbgctls_main, dbgctls_exit_handler, + NULL, {0} }, + { "MSR switch", msr_switch_init, msr_switch_main, + msr_switch_exit_handler, NULL, {0}, msr_switch_entry_failure }, + { "vmmcall", vmmcall_init, vmmcall_main, vmmcall_exit_handler, NULL, {0} }, + { "disable RDTSCP", disable_rdtscp_init, disable_rdtscp_main, + disable_rdtscp_exit_handler, NULL, {0} }, + { "int3", int3_init, int3_guest_main, int3_exit_handler, NULL, {0} }, + { "into", into_init, into_guest_main, into_exit_handler, NULL, {0} }, + { NULL, NULL, NULL, NULL, NULL, {0} }, +}; diff --git a/tests/kvm-unit-tests/x86/xsave.c b/tests/kvm-unit-tests/x86/xsave.c new file mode 100644 index 00000000..52142d2c --- /dev/null +++ b/tests/kvm-unit-tests/x86/xsave.c @@ -0,0 +1,178 @@ +#include "libcflat.h" +#include "desc.h" +#include "processor.h" + +#ifdef __x86_64__ +#define uint64_t unsigned long +#else +#define uint64_t unsigned long long +#endif + +int xgetbv_checking(u32 index, u64 *result) +{ + u32 eax, edx; + + asm volatile(ASM_TRY("1f") + ".byte 0x0f,0x01,0xd0\n\t" /* xgetbv */ + "1:" + : "=a" (eax), "=d" (edx) + : "c" (index)); + *result = eax + ((u64)edx << 32); + return exception_vector(); +} + +int xsetbv_checking(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile(ASM_TRY("1f") + ".byte 0x0f,0x01,0xd1\n\t" /* xsetbv */ + "1:" + : : "a" (eax), "d" (edx), "c" (index)); + return exception_vector(); +} + +int write_cr4_checking(unsigned long val) +{ + asm volatile(ASM_TRY("1f") + "mov %0,%%cr4\n\t" + "1:": : "r" (val)); + return exception_vector(); +} + +#define CPUID_1_ECX_XSAVE (1 << 26) +#define CPUID_1_ECX_OSXSAVE (1 << 27) +int check_cpuid_1_ecx(unsigned int bit) +{ + return (cpuid(1).c & bit) != 0; +} + +uint64_t get_supported_xcr0(void) +{ + struct cpuid r; + r = cpuid_indexed(0xd, 0); + printf("eax %x, ebx %x, ecx %x, edx %x\n", + r.a, r.b, r.c, r.d); + return r.a + ((u64)r.d << 32); +} + +#define X86_CR4_OSXSAVE 0x00040000 +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 +#define XCR_XFEATURE_ILLEGAL_MASK 0x00000010 + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 + +void test_xsave(void) +{ + unsigned long cr4; + uint64_t supported_xcr0; + uint64_t test_bits; + u64 xcr0; + + printf("Legal instruction testing:\n"); + + supported_xcr0 = get_supported_xcr0(); + printf("Supported XCR0 bits: 0x%lx\n", supported_xcr0); + + test_bits = XSTATE_FP | XSTATE_SSE; + report("Check minimal XSAVE required bits", + (supported_xcr0 & test_bits) == test_bits); + + cr4 = read_cr4(); + report("Set CR4 OSXSAVE", write_cr4_checking(cr4 | X86_CR4_OSXSAVE) == 0); + report("Check CPUID.1.ECX.OSXSAVE - expect 1", + check_cpuid_1_ecx(CPUID_1_ECX_OSXSAVE)); + + printf("\tLegal tests\n"); + test_bits = XSTATE_FP; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, XSTATE_FP)", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == 0); + + test_bits = XSTATE_FP | XSTATE_SSE; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, XSTATE_FP | XSTATE_SSE)", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == 0); + report(" xgetbv(XCR_XFEATURE_ENABLED_MASK)", + xgetbv_checking(XCR_XFEATURE_ENABLED_MASK, &xcr0) == 0); + + printf("\tIllegal tests\n"); + test_bits = 0; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, 0) - expect #GP", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == GP_VECTOR); + + test_bits = XSTATE_SSE; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, XSTATE_SSE) - expect #GP", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == GP_VECTOR); + + if (supported_xcr0 & XSTATE_YMM) { + test_bits = XSTATE_YMM; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, XSTATE_YMM) - expect #GP", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == GP_VECTOR); + + test_bits = XSTATE_FP | XSTATE_YMM; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, XSTATE_FP | XSTATE_YMM) - expect #GP", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == GP_VECTOR); + } + + test_bits = XSTATE_SSE; + report("\t\txsetbv(XCR_XFEATURE_ILLEGAL_MASK, XSTATE_FP) - expect #GP", + xsetbv_checking(XCR_XFEATURE_ILLEGAL_MASK, test_bits) == GP_VECTOR); + + test_bits = XSTATE_SSE; + report("\t\txgetbv(XCR_XFEATURE_ILLEGAL_MASK, XSTATE_FP) - expect #GP", + xsetbv_checking(XCR_XFEATURE_ILLEGAL_MASK, test_bits) == GP_VECTOR); + + cr4 &= ~X86_CR4_OSXSAVE; + report("Unset CR4 OSXSAVE", write_cr4_checking(cr4) == 0); + report("Check CPUID.1.ECX.OSXSAVE - expect 0", + check_cpuid_1_ecx(CPUID_1_ECX_OSXSAVE) == 0); + + printf("\tIllegal tests:\n"); + test_bits = XSTATE_FP; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, XSTATE_FP) - expect #UD", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == UD_VECTOR); + + test_bits = XSTATE_FP | XSTATE_SSE; + report("\t\txsetbv(XCR_XFEATURE_ENABLED_MASK, XSTATE_FP | XSTATE_SSE) - expect #UD", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, test_bits) == UD_VECTOR); + + printf("\tIllegal tests:\n"); + report("\txgetbv(XCR_XFEATURE_ENABLED_MASK) - expect #UD", + xgetbv_checking(XCR_XFEATURE_ENABLED_MASK, &xcr0) == UD_VECTOR); +} + +void test_no_xsave(void) +{ + unsigned long cr4; + u64 xcr0; + + report("Check CPUID.1.ECX.OSXSAVE - expect 0", + check_cpuid_1_ecx(CPUID_1_ECX_OSXSAVE) == 0); + + printf("Illegal instruction testing:\n"); + + cr4 = read_cr4(); + report("Set OSXSAVE in CR4 - expect #GP", + write_cr4_checking(cr4 | X86_CR4_OSXSAVE) == GP_VECTOR); + + report("Execute xgetbv - expect #UD", + xgetbv_checking(XCR_XFEATURE_ENABLED_MASK, &xcr0) == UD_VECTOR); + + report("Execute xsetbv - expect #UD", + xsetbv_checking(XCR_XFEATURE_ENABLED_MASK, 0x3) == UD_VECTOR); +} + +int main(void) +{ + setup_idt(); + if (check_cpuid_1_ecx(CPUID_1_ECX_XSAVE)) { + printf("CPU has XSAVE feature\n"); + test_xsave(); + } else { + printf("CPU don't has XSAVE feature\n"); + test_no_xsave(); + } + return report_summary(); +}