kvm-unit-test: Add missing files

This commit is contained in:
Fabian 2017-07-28 20:59:17 +02:00
parent dbe71128e6
commit 3a78beeb78
14 changed files with 1355 additions and 0 deletions

View file

@ -0,0 +1,30 @@
#include "kvmxx.hh"
#include "identity.hh"
#include "exception.hh"
#include <stdio.h>
static int global = 0;
static void set_global()
{
global = 1;
}
int test_main(int ac, char** av)
{
kvm::system system;
kvm::vm vm(system);
mem_map memmap(vm);
identity::vm ident_vm(vm, memmap);
kvm::vcpu vcpu(vm, 0);
identity::vcpu thread(vcpu, set_global);
vcpu.run();
printf("global %d\n", global);
return global == 1 ? 0 : 1;
}
int main(int ac, char** av)
{
return try_main(test_main, ac, av);
}

View file

@ -0,0 +1,146 @@
#include "kvmxx.hh"
#include "exception.hh"
#include "memmap.hh"
#include "identity.hh"
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
namespace {
const int page_size = 4096;
int64_t nr_total_pages = 256 * 1024;
int64_t nr_slot_pages = 256 * 1024;
// Return the current time in nanoseconds.
uint64_t time_ns()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * (uint64_t)1000000000 + ts.tv_nsec;
}
// Update nr_to_write pages selected from nr_pages pages.
void write_mem(void* slot_head, int64_t nr_to_write, int64_t nr_pages)
{
char* var = static_cast<char*>(slot_head);
int64_t interval = nr_pages / nr_to_write;
for (int64_t i = 0; i < nr_to_write; ++i) {
++(*var);
var += interval * page_size;
}
}
// Let the guest update nr_to_write pages selected from nr_pages pages.
void do_guest_write(kvm::vcpu& vcpu, void* slot_head,
int64_t nr_to_write, int64_t nr_pages)
{
identity::vcpu guest_write_thread(vcpu, std::bind(write_mem, slot_head,
nr_to_write, nr_pages));
vcpu.run();
}
// Check how long it takes to update dirty log.
void check_dirty_log(kvm::vcpu& vcpu, mem_slot& slot, void* slot_head)
{
slot.set_dirty_logging(true);
slot.update_dirty_log();
for (int64_t i = 1; i <= nr_slot_pages; i *= 2) {
do_guest_write(vcpu, slot_head, i, nr_slot_pages);
uint64_t start_ns = time_ns();
int n = slot.update_dirty_log();
uint64_t end_ns = time_ns();
printf("get dirty log: %10lld ns for %10d dirty pages (expected %lld)\n",
end_ns - start_ns, n, i);
}
slot.set_dirty_logging(false);
}
}
void parse_options(int ac, char **av)
{
int opt;
char *endptr;
while ((opt = getopt(ac, av, "n:m:")) != -1) {
switch (opt) {
case 'n':
errno = 0;
nr_slot_pages = strtol(optarg, &endptr, 10);
if (errno || endptr == optarg) {
printf("dirty-log-perf: Invalid number: -n %s\n", optarg);
exit(1);
}
if (*endptr == 'k' || *endptr == 'K') {
nr_slot_pages *= 1024;
}
break;
case 'm':
errno = 0;
nr_total_pages = strtol(optarg, &endptr, 10);
if (errno || endptr == optarg) {
printf("dirty-log-perf: Invalid number: -m %s\n", optarg);
exit(1);
}
if (*endptr == 'k' || *endptr == 'K') {
nr_total_pages *= 1024;
}
break;
default:
printf("dirty-log-perf: Invalid option\n");
exit(1);
}
}
if (nr_slot_pages > nr_total_pages) {
printf("dirty-log-perf: Invalid setting: slot %lld > mem %lld\n",
nr_slot_pages, nr_total_pages);
exit(1);
}
printf("dirty-log-perf: %lld slot pages / %lld mem pages\n",
nr_slot_pages, nr_total_pages);
}
int test_main(int ac, char **av)
{
kvm::system sys;
kvm::vm vm(sys);
mem_map memmap(vm);
parse_options(ac, av);
void* mem_head;
int64_t mem_size = nr_total_pages * page_size;
if (posix_memalign(&mem_head, page_size, mem_size)) {
printf("dirty-log-perf: Could not allocate guest memory.\n");
exit(1);
}
uint64_t mem_addr = reinterpret_cast<uintptr_t>(mem_head);
identity::hole hole(mem_head, mem_size);
identity::vm ident_vm(vm, memmap, hole);
kvm::vcpu vcpu(vm, 0);
uint64_t slot_size = nr_slot_pages * page_size;
uint64_t next_size = mem_size - slot_size;
uint64_t next_addr = mem_addr + slot_size;
mem_slot slot(memmap, mem_addr, slot_size, mem_head);
mem_slot other_slot(memmap, next_addr, next_size, (void *)next_addr);
// pre-allocate shadow pages
do_guest_write(vcpu, mem_head, nr_total_pages, nr_total_pages);
check_dirty_log(vcpu, slot, mem_head);
return 0;
}
int main(int ac, char** av)
{
return try_main(test_main, ac, av);
}

View file

@ -0,0 +1,84 @@
#include "kvmxx.hh"
#include "exception.hh"
#include "memmap.hh"
#include "identity.hh"
#include <thread>
#include <stdlib.h>
#include <stdio.h>
namespace {
void delay_loop(unsigned n)
{
for (unsigned i = 0; i < n; ++i) {
asm volatile("pause");
}
}
void write_mem(volatile bool& running, volatile int* shared_var)
{
while (running) {
++*shared_var;
delay_loop(1000);
}
}
void check_dirty_log(mem_slot& slot,
volatile bool& running,
volatile int* shared_var,
int& nr_fail)
{
uint64_t shared_var_gpa = reinterpret_cast<uint64_t>(shared_var);
slot.set_dirty_logging(true);
slot.update_dirty_log();
for (int i = 0; i < 10000000; ++i) {
int sample1 = *shared_var;
delay_loop(600);
int sample2 = *shared_var;
slot.update_dirty_log();
if (!slot.is_dirty(shared_var_gpa) && sample1 != sample2) {
++nr_fail;
}
}
running = false;
slot.set_dirty_logging(false);
}
}
int test_main(int ac, char **av)
{
kvm::system sys;
kvm::vm vm(sys);
mem_map memmap(vm);
void* logged_slot_virt;
int ret = posix_memalign(&logged_slot_virt, 4096, 4096);
if (ret) {
throw errno_exception(ret);
}
volatile int* shared_var = static_cast<volatile int*>(logged_slot_virt);
identity::hole hole(logged_slot_virt, 4096);
identity::vm ident_vm(vm, memmap, hole);
kvm::vcpu vcpu(vm, 0);
bool running = true;
int nr_fail = 0;
mem_slot logged_slot(memmap,
reinterpret_cast<uintptr_t>(logged_slot_virt),
4096, logged_slot_virt);
std::thread host_poll_thread(check_dirty_log, std::ref(logged_slot),
std::ref(running),
shared_var, std::ref(nr_fail));
identity::vcpu guest_write_thread(vcpu,
std::bind(write_mem,
std::ref(running),
shared_var));
vcpu.run();
host_poll_thread.join();
printf("Dirty bitmap failures: %d\n", nr_fail);
return nr_fail == 0 ? 0 : 1;
}
int main(int ac, char** av)
{
return try_main(test_main, ac, av);
}

View file

@ -0,0 +1,33 @@
#include "exception.hh"
#include <cstdio>
#include <cstring>
errno_exception::errno_exception(int errno)
: _errno(errno)
{
}
int errno_exception::errno() const
{
return _errno;
}
const char *errno_exception::what()
{
std::snprintf(_buf, sizeof _buf, "error: %s (%d)",
std::strerror(_errno), _errno);
return _buf;
}
int try_main(int (*main)(int argc, char** argv), int argc, char** argv,
int ret_on_exception)
{
try {
return main(argc, argv);
} catch (std::exception& e) {
std::fprintf(stderr, "exception: %s\n", e.what());
} catch (...) {
std::fprintf(stderr, "unknown exception\n");
}
return ret_on_exception;
}

View file

@ -0,0 +1,19 @@
#ifndef EXCEPTION_HH
#define EXCEPTION_HH
#include <exception>
class errno_exception : public std::exception {
public:
explicit errno_exception(int err_no);
int errno() const;
virtual const char *what();
private:
int _errno;
char _buf[1000];
};
int try_main(int (*main)(int argc, char** argv), int argc, char** argv,
int ret_on_exception = 127);
#endif

View file

@ -0,0 +1,120 @@
#include "identity.hh"
#include "exception.hh"
#include <stdlib.h>
#include <stdio.h>
namespace identity {
typedef unsigned long ulong;
hole::hole()
: address(), size()
{
}
hole::hole(void* address, size_t size)
: address(address), size(size)
{
}
vm::vm(kvm::vm& vm, mem_map& mmap, hole h)
{
int ret = posix_memalign(&tss, 4096, 4 * 4096);
if (ret) {
throw errno_exception(ret);
}
uint64_t hole_gpa = reinterpret_cast<uintptr_t>(h.address);
char* hole_hva = static_cast<char*>(h.address);
uint64_t tss_addr = reinterpret_cast<uintptr_t>(tss);
uint64_t tss_end = tss_addr + 4 * 4096;
uint64_t hole_end = hole_gpa + h.size;
if (hole_gpa < tss_addr) {
if (hole_gpa) {
_slots.push_back(mem_slot_ptr(new mem_slot(mmap, 0, hole_gpa, NULL)));
}
_slots.push_back(mem_slot_ptr(new mem_slot(mmap, hole_end, tss_addr - hole_end,
hole_hva + h.size)));
_slots.push_back(mem_slot_ptr(new mem_slot(mmap, tss_end, (uint32_t)-tss_end,
(char*)tss + 4 * 4096)));
} else {
_slots.push_back(mem_slot_ptr(new mem_slot(mmap, 0, tss_addr, NULL)));
_slots.push_back(mem_slot_ptr(new mem_slot(mmap, tss_end, hole_gpa - tss_end,
(char*)tss + 4 * 4096)));
_slots.push_back(mem_slot_ptr(new mem_slot(mmap, hole_end, (uint32_t)-hole_end,
hole_hva + h.size)));
}
vm.set_tss_addr(tss_addr);
vm.set_ept_identity_map_addr(tss_addr + 3 * 4096);
}
vm::~vm()
{
free(tss);
}
void vcpu::setup_sregs()
{
kvm_sregs sregs = { };
kvm_segment dseg = { };
dseg.base = 0; dseg.limit = -1U; dseg.type = 3; dseg.present = 1;
dseg.dpl = 3; dseg.db = 1; dseg.s = 1; dseg.l = 0; dseg.g = 1;
kvm_segment cseg = dseg;
cseg.type = 11;
sregs.cs = cseg; asm ("mov %%cs, %0" : "=rm"(sregs.cs.selector));
sregs.ds = dseg; asm ("mov %%ds, %0" : "=rm"(sregs.ds.selector));
sregs.es = dseg; asm ("mov %%es, %0" : "=rm"(sregs.es.selector));
sregs.fs = dseg; asm ("mov %%fs, %0" : "=rm"(sregs.fs.selector));
sregs.gs = dseg; asm ("mov %%gs, %0" : "=rm"(sregs.gs.selector));
sregs.ss = dseg; asm ("mov %%ss, %0" : "=rm"(sregs.ss.selector));
uint32_t gsbase;
asm ("mov %%gs:0, %0" : "=r"(gsbase));
sregs.gs.base = gsbase;
sregs.tr.base = reinterpret_cast<uintptr_t>(&*_stack.begin());
sregs.tr.type = 11;
sregs.tr.s = 0;
sregs.tr.present = 1;
sregs.cr0 = 0x11; /* PE, ET, !PG */
sregs.cr4 = 0;
sregs.efer = 0;
sregs.apic_base = 0xfee00000;
_vcpu.set_sregs(sregs);
}
void vcpu::thunk(vcpu* zis)
{
zis->_guest_func();
asm volatile("outb %%al, %%dx" : : "a"(0), "d"(0));
}
void vcpu::setup_regs()
{
kvm_regs regs = {};
regs.rflags = 0x3202;
regs.rsp = reinterpret_cast<ulong>(&*_stack.end());
regs.rsp &= ~15UL;
ulong* sp = reinterpret_cast<ulong *>(regs.rsp);
*--sp = reinterpret_cast<ulong>((char*)this);
*--sp = 0;
regs.rsp = reinterpret_cast<ulong>(sp);
regs.rip = reinterpret_cast<ulong>(&vcpu::thunk);
printf("rip %llx\n", regs.rip);
_vcpu.set_regs(regs);
}
vcpu::vcpu(kvm::vcpu& vcpu, std::function<void ()> guest_func,
unsigned long stack_size)
: _vcpu(vcpu), _guest_func(guest_func), _stack(stack_size)
{
setup_sregs();
setup_regs();
}
}

View file

@ -0,0 +1,45 @@
#ifndef API_IDENTITY_HH
#define API_IDENTITY_HH
#include "kvmxx.hh"
#include "memmap.hh"
#include <functional>
#include <memory>
#include <vector>
namespace identity {
struct hole {
hole();
hole(void* address, size_t size);
void* address;
size_t size;
};
class vm {
public:
vm(kvm::vm& vm, mem_map& mmap, hole address_space_hole = hole());
~vm();
private:
void *tss;
typedef std::shared_ptr<mem_slot> mem_slot_ptr;
std::vector<mem_slot_ptr> _slots;
};
class vcpu {
public:
vcpu(kvm::vcpu& vcpu, std::function<void ()> guest_func,
unsigned long stack_size = 256 * 1024);
private:
static void thunk(vcpu* vcpu);
void setup_regs();
void setup_sregs();
private:
kvm::vcpu& _vcpu;
std::function<void ()> _guest_func;
std::vector<char> _stack;
};
}
#endif

View file

@ -0,0 +1,199 @@
#include "kvmxx.hh"
#include "exception.hh"
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <memory>
#include <algorithm>
namespace kvm {
static long check_error(long r)
{
if (r == -1) {
throw errno_exception(errno);
}
return r;
}
fd::fd(int fd)
: _fd(fd)
{
}
fd::fd(const fd& other)
: _fd(::dup(other._fd))
{
check_error(_fd);
}
fd::fd(std::string device_node, int flags)
: _fd(::open(device_node.c_str(), flags))
{
check_error(_fd);
}
long fd::ioctl(unsigned nr, long arg)
{
return check_error(::ioctl(_fd, nr, arg));
}
vcpu::vcpu(vm& vm, int id)
: _vm(vm), _fd(vm._fd.ioctl(KVM_CREATE_VCPU, id)), _shared(NULL)
, _mmap_size(_vm._system._fd.ioctl(KVM_GET_VCPU_MMAP_SIZE, 0))
{
kvm_run *shared = static_cast<kvm_run*>(::mmap(NULL, _mmap_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
_fd.get(), 0));
if (shared == MAP_FAILED) {
throw errno_exception(errno);
}
_shared = shared;
}
vcpu::~vcpu()
{
munmap(_shared, _mmap_size);
}
void vcpu::run()
{
_fd.ioctl(KVM_RUN, 0);
}
kvm_regs vcpu::regs()
{
kvm_regs regs;
_fd.ioctlp(KVM_GET_REGS, &regs);
return regs;
}
void vcpu::set_regs(const kvm_regs& regs)
{
_fd.ioctlp(KVM_SET_REGS, const_cast<kvm_regs*>(&regs));
}
kvm_sregs vcpu::sregs()
{
kvm_sregs sregs;
_fd.ioctlp(KVM_GET_SREGS, &sregs);
return sregs;
}
void vcpu::set_sregs(const kvm_sregs& sregs)
{
_fd.ioctlp(KVM_SET_SREGS, const_cast<kvm_sregs*>(&sregs));
}
class vcpu::kvm_msrs_ptr {
public:
explicit kvm_msrs_ptr(size_t nmsrs);
~kvm_msrs_ptr() { ::free(_kvm_msrs); }
kvm_msrs* operator->() { return _kvm_msrs; }
kvm_msrs* get() { return _kvm_msrs; }
private:
kvm_msrs* _kvm_msrs;
};
vcpu::kvm_msrs_ptr::kvm_msrs_ptr(size_t nmsrs)
: _kvm_msrs(0)
{
size_t size = sizeof(kvm_msrs) + sizeof(kvm_msr_entry) * nmsrs;
_kvm_msrs = static_cast<kvm_msrs*>(::malloc(size));
if (!_kvm_msrs) {
throw std::bad_alloc();
}
}
std::vector<kvm_msr_entry> vcpu::msrs(std::vector<uint32_t> indices)
{
kvm_msrs_ptr msrs(indices.size());
msrs->nmsrs = indices.size();
for (unsigned i = 0; i < msrs->nmsrs; ++i) {
msrs->entries[i].index = indices[i];
}
_fd.ioctlp(KVM_GET_MSRS, msrs.get());
return std::vector<kvm_msr_entry>(msrs->entries,
msrs->entries + msrs->nmsrs);
}
void vcpu::set_msrs(const std::vector<kvm_msr_entry>& msrs)
{
kvm_msrs_ptr _msrs(msrs.size());
_msrs->nmsrs = msrs.size();
std::copy(msrs.begin(), msrs.end(), _msrs->entries);
_fd.ioctlp(KVM_SET_MSRS, _msrs.get());
}
void vcpu::set_debug(uint64_t dr[8], bool enabled, bool singlestep)
{
kvm_guest_debug gd;
gd.control = 0;
if (enabled) {
gd.control |= KVM_GUESTDBG_ENABLE;
}
if (singlestep) {
gd.control |= KVM_GUESTDBG_SINGLESTEP;
}
for (int i = 0; i < 8; ++i) {
gd.arch.debugreg[i] = dr[i];
}
_fd.ioctlp(KVM_SET_GUEST_DEBUG, &gd);
}
vm::vm(system& system)
: _system(system), _fd(system._fd.ioctl(KVM_CREATE_VM, 0))
{
}
void vm::set_memory_region(int slot, void *addr, uint64_t gpa, size_t len,
uint32_t flags)
{
struct kvm_userspace_memory_region umr;
umr.slot = slot;
umr.flags = flags;
umr.guest_phys_addr = gpa;
umr.memory_size = len;
umr.userspace_addr = reinterpret_cast<uintptr_t>(addr);
_fd.ioctlp(KVM_SET_USER_MEMORY_REGION, &umr);
}
void vm::get_dirty_log(int slot, void *log)
{
struct kvm_dirty_log kdl;
kdl.slot = slot;
kdl.dirty_bitmap = log;
_fd.ioctlp(KVM_GET_DIRTY_LOG, &kdl);
}
void vm::set_tss_addr(uint32_t addr)
{
_fd.ioctl(KVM_SET_TSS_ADDR, addr);
}
void vm::set_ept_identity_map_addr(uint64_t addr)
{
_fd.ioctlp(KVM_SET_IDENTITY_MAP_ADDR, &addr);
}
system::system(std::string device_node)
: _fd(device_node, O_RDWR)
{
}
bool system::check_extension(int extension)
{
return _fd.ioctl(KVM_CHECK_EXTENSION, extension);
}
int system::get_extension_int(int extension)
{
return _fd.ioctl(KVM_CHECK_EXTENSION, extension);
}
};

View file

@ -0,0 +1,86 @@
#ifndef KVMXX_H
#define KVMXX_H
#include <string>
#include <signal.h>
#include <unistd.h>
#include <vector>
#include <errno.h>
#include <linux/kvm.h>
#include <stdint.h>
namespace kvm {
class system;
class vm;
class vcpu;
class fd;
class fd {
public:
explicit fd(int n);
explicit fd(std::string path, int flags);
fd(const fd& other);
~fd() { ::close(_fd); }
int get() { return _fd; }
long ioctl(unsigned nr, long arg);
long ioctlp(unsigned nr, void *arg) {
return ioctl(nr, reinterpret_cast<long>(arg));
}
private:
int _fd;
};
class vcpu {
public:
vcpu(vm& vm, int fd);
~vcpu();
void run();
kvm_run *shared();
kvm_regs regs();
void set_regs(const kvm_regs& regs);
kvm_sregs sregs();
void set_sregs(const kvm_sregs& sregs);
std::vector<kvm_msr_entry> msrs(std::vector<uint32_t> indices);
void set_msrs(const std::vector<kvm_msr_entry>& msrs);
void set_debug(uint64_t dr[8], bool enabled, bool singlestep);
private:
class kvm_msrs_ptr;
private:
vm& _vm;
fd _fd;
kvm_run *_shared;
unsigned _mmap_size;
friend class vm;
};
class vm {
public:
explicit vm(system& system);
void set_memory_region(int slot, void *addr, uint64_t gpa, size_t len,
uint32_t flags = 0);
void get_dirty_log(int slot, void *log);
void set_tss_addr(uint32_t addr);
void set_ept_identity_map_addr(uint64_t addr);
system& sys() { return _system; }
private:
system& _system;
fd _fd;
friend class system;
friend class vcpu;
};
class system {
public:
explicit system(std::string device_node = "/dev/kvm");
bool check_extension(int extension);
int get_extension_int(int extension);
private:
fd _fd;
friend class vcpu;
friend class vm;
};
};
#endif

View file

@ -0,0 +1,96 @@
#include "memmap.hh"
#include <numeric>
mem_slot::mem_slot(mem_map& map, uint64_t gpa, uint64_t size, void* hva)
: _map(map)
, _slot(map._free_slots.top())
, _gpa(gpa)
, _size(size)
, _hva(hva)
, _dirty_log_enabled(false)
, _log()
{
map._free_slots.pop();
if (_size) {
update();
}
}
mem_slot::~mem_slot()
{
if (!_size) {
return;
}
_size = 0;
try {
update();
_map._free_slots.push(_slot);
} catch (...) {
// can't do much if we can't undo slot registration - leak the slot
}
}
void mem_slot::set_dirty_logging(bool enabled)
{
if (_dirty_log_enabled != enabled) {
_dirty_log_enabled = enabled;
if (enabled) {
int logsize = ((_size >> 12) + bits_per_word - 1) / bits_per_word;
_log.resize(logsize);
} else {
_log.resize(0);
}
if (_size) {
update();
}
}
}
void mem_slot::update()
{
uint32_t flags = 0;
if (_dirty_log_enabled) {
flags |= KVM_MEM_LOG_DIRTY_PAGES;
}
_map._vm.set_memory_region(_slot, _hva, _gpa, _size, flags);
}
bool mem_slot::dirty_logging() const
{
return _dirty_log_enabled;
}
static inline int hweight(uint64_t w)
{
w -= (w >> 1) & 0x5555555555555555;
w = (w & 0x3333333333333333) + ((w >> 2) & 0x3333333333333333);
w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0f;
return (w * 0x0101010101010101) >> 56;
}
int mem_slot::update_dirty_log()
{
_map._vm.get_dirty_log(_slot, &_log[0]);
return std::accumulate(_log.begin(), _log.end(), 0,
[] (int prev, ulong elem) -> int {
return prev + hweight(elem);
});
}
bool mem_slot::is_dirty(uint64_t gpa) const
{
uint64_t pagenr = (gpa - _gpa) >> 12;
ulong wordnr = pagenr / bits_per_word;
ulong bit = 1ULL << (pagenr % bits_per_word);
return _log[wordnr] & bit;
}
mem_map::mem_map(kvm::vm& vm)
: _vm(vm)
{
int nr_slots = vm.sys().get_extension_int(KVM_CAP_NR_MEMSLOTS);
for (int i = 0; i < nr_slots; ++i) {
_free_slots.push(i);
}
}

View file

@ -0,0 +1,43 @@
#ifndef MEMMAP_HH
#define MEMMAP_HH
#include "kvmxx.hh"
#include <stdint.h>
#include <vector>
#include <stack>
class mem_map;
class mem_slot;
class mem_slot {
public:
mem_slot(mem_map& map, uint64_t gpa, uint64_t size, void *hva);
~mem_slot();
void set_dirty_logging(bool enabled);
bool dirty_logging() const;
int update_dirty_log();
bool is_dirty(uint64_t gpa) const;
private:
void update();
private:
typedef unsigned long ulong;
static const int bits_per_word = sizeof(ulong) * 8;
mem_map& _map;
int _slot;
uint64_t _gpa;
uint64_t _size;
void *_hva;
bool _dirty_log_enabled;
std::vector<ulong> _log;
};
class mem_map {
public:
mem_map(kvm::vm& vm);
private:
kvm::vm& _vm;
std::stack<int> _free_slots;
friend class mem_slot;
};
#endif

View file

@ -0,0 +1,10 @@
/*
* Set up arguments for main() and prepare environment variables
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU Library General Public License version 2.
*/
extern void __setup_args(void);
extern void setup_args_progname(const char *args);
extern void setup_env(char *env, int size);

View file

@ -0,0 +1,336 @@
#include "libcflat.h"
#include "vm.h"
#include "smp.h"
#include "isr.h"
#include "atomic.h"
#include "hyperv.h"
#include "bitops.h"
#define MAX_CPUS 64
#define MSG_VEC 0xb0
#define EVT_VEC 0xb1
#define MSG_SINT 0x8
#define EVT_SINT 0x9
#define MSG_CONN_BASE 0x10
#define EVT_CONN_BASE 0x20
#define MSG_TYPE 0x12345678
#define WAIT_CYCLES 10000000
static atomic_t ncpus_done;
struct hv_vcpu {
struct hv_message_page *msg_page;
struct hv_event_flags_page *evt_page;
struct hv_input_post_message *post_msg;
u8 msg_conn;
u8 evt_conn;
u64 hvcall_status;
atomic_t sint_received;
};
static struct hv_vcpu hv_vcpus[MAX_CPUS];
static void sint_isr(isr_regs_t *regs)
{
atomic_inc(&hv_vcpus[smp_id()].sint_received);
}
static void *hypercall_page;
static void setup_hypercall(void)
{
u64 guestid = (0x8f00ull << 48);
hypercall_page = alloc_page();
if (!hypercall_page)
report_abort("failed to allocate hypercall page");
memset(hypercall_page, 0, PAGE_SIZE);
wrmsr(HV_X64_MSR_GUEST_OS_ID, guestid);
wrmsr(HV_X64_MSR_HYPERCALL,
(u64)virt_to_phys(hypercall_page) | HV_X64_MSR_HYPERCALL_ENABLE);
}
static void teardown_hypercall(void)
{
wrmsr(HV_X64_MSR_HYPERCALL, 0);
wrmsr(HV_X64_MSR_GUEST_OS_ID, 0);
free_page(hypercall_page);
}
static u64 do_hypercall(u16 code, u64 arg, bool fast)
{
u64 ret;
u64 ctl = code;
if (fast)
ctl |= HV_HYPERCALL_FAST;
asm volatile ("call *%[hcall_page]"
#ifdef __x86_64__
"\n mov $0,%%r8"
: "=a"(ret)
: "c"(ctl), "d"(arg),
#else
: "=A"(ret)
: "A"(ctl),
"b" ((u32)(arg >> 32)), "c" ((u32)arg),
"D"(0), "S"(0),
#endif
[hcall_page] "m" (hypercall_page)
#ifdef __x86_64__
: "r8"
#endif
);
return ret;
}
static void setup_cpu(void *ctx)
{
int vcpu;
struct hv_vcpu *hv;
write_cr3((ulong)ctx);
irq_enable();
vcpu = smp_id();
hv = &hv_vcpus[vcpu];
hv->msg_page = alloc_page();
hv->evt_page = alloc_page();
hv->post_msg = alloc_page();
if (!hv->msg_page || !hv->evt_page || !hv->post_msg)
report_abort("failed to allocate synic pages for vcpu");
memset(hv->msg_page, 0, sizeof(*hv->msg_page));
memset(hv->evt_page, 0, sizeof(*hv->evt_page));
memset(hv->post_msg, 0, sizeof(*hv->post_msg));
hv->msg_conn = MSG_CONN_BASE + vcpu;
hv->evt_conn = EVT_CONN_BASE + vcpu;
wrmsr(HV_X64_MSR_SIMP,
(u64)virt_to_phys(hv->msg_page) | HV_SYNIC_SIMP_ENABLE);
wrmsr(HV_X64_MSR_SIEFP,
(u64)virt_to_phys(hv->evt_page) | HV_SYNIC_SIEFP_ENABLE);
wrmsr(HV_X64_MSR_SCONTROL, HV_SYNIC_CONTROL_ENABLE);
msg_conn_create(MSG_SINT, MSG_VEC, hv->msg_conn);
evt_conn_create(EVT_SINT, EVT_VEC, hv->evt_conn);
hv->post_msg->connectionid = hv->msg_conn;
hv->post_msg->message_type = MSG_TYPE;
hv->post_msg->payload_size = 8;
hv->post_msg->payload[0] = (u64)vcpu << 16;
}
static void teardown_cpu(void *ctx)
{
int vcpu = smp_id();
struct hv_vcpu *hv = &hv_vcpus[vcpu];
evt_conn_destroy(EVT_SINT, hv->evt_conn);
msg_conn_destroy(MSG_SINT, hv->msg_conn);
wrmsr(HV_X64_MSR_SCONTROL, 0);
wrmsr(HV_X64_MSR_SIEFP, 0);
wrmsr(HV_X64_MSR_SIMP, 0);
free_page(hv->post_msg);
free_page(hv->evt_page);
free_page(hv->msg_page);
}
static void do_msg(void *ctx)
{
int vcpu = (ulong)ctx;
struct hv_vcpu *hv = &hv_vcpus[vcpu];
struct hv_input_post_message *msg = hv->post_msg;
msg->payload[0]++;
atomic_set(&hv->sint_received, 0);
hv->hvcall_status = do_hypercall(HVCALL_POST_MESSAGE,
virt_to_phys(msg), 0);
atomic_inc(&ncpus_done);
}
static void clear_msg(void *ctx)
{
/* should only be done on the current vcpu */
int vcpu = smp_id();
struct hv_vcpu *hv = &hv_vcpus[vcpu];
struct hv_message *msg = &hv->msg_page->sint_message[MSG_SINT];
atomic_set(&hv->sint_received, 0);
msg->header.message_type = 0;
barrier();
wrmsr(HV_X64_MSR_EOM, 0);
atomic_inc(&ncpus_done);
}
static bool msg_ok(int vcpu)
{
struct hv_vcpu *hv = &hv_vcpus[vcpu];
struct hv_input_post_message *post_msg = hv->post_msg;
struct hv_message *msg = &hv->msg_page->sint_message[MSG_SINT];
return msg->header.message_type == post_msg->message_type &&
msg->header.payload_size == post_msg->payload_size &&
msg->header.message_flags.msg_pending == 0 &&
msg->u.payload[0] == post_msg->payload[0] &&
hv->hvcall_status == 0 &&
atomic_read(&hv->sint_received) == 1;
}
static bool msg_busy(int vcpu)
{
struct hv_vcpu *hv = &hv_vcpus[vcpu];
struct hv_input_post_message *post_msg = hv->post_msg;
struct hv_message *msg = &hv->msg_page->sint_message[MSG_SINT];
return msg->header.message_type == post_msg->message_type &&
msg->header.payload_size == post_msg->payload_size &&
msg->header.message_flags.msg_pending == 1 &&
msg->u.payload[0] == post_msg->payload[0] - 1 &&
hv->hvcall_status == 0 &&
atomic_read(&hv->sint_received) == 0;
}
static void do_evt(void *ctx)
{
int vcpu = (ulong)ctx;
struct hv_vcpu *hv = &hv_vcpus[vcpu];
atomic_set(&hv->sint_received, 0);
hv->hvcall_status = do_hypercall(HVCALL_SIGNAL_EVENT,
hv->evt_conn, 1);
atomic_inc(&ncpus_done);
}
static void clear_evt(void *ctx)
{
/* should only be done on the current vcpu */
int vcpu = smp_id();
struct hv_vcpu *hv = &hv_vcpus[vcpu];
ulong *flags = hv->evt_page->slot[EVT_SINT].flags;
atomic_set(&hv->sint_received, 0);
flags[BIT_WORD(hv->evt_conn)] &= ~BIT_MASK(hv->evt_conn);
barrier();
atomic_inc(&ncpus_done);
}
static bool evt_ok(int vcpu)
{
struct hv_vcpu *hv = &hv_vcpus[vcpu];
ulong *flags = hv->evt_page->slot[EVT_SINT].flags;
return flags[BIT_WORD(hv->evt_conn)] == BIT_MASK(hv->evt_conn) &&
hv->hvcall_status == 0 &&
atomic_read(&hv->sint_received) == 1;
}
static bool evt_busy(int vcpu)
{
struct hv_vcpu *hv = &hv_vcpus[vcpu];
ulong *flags = hv->evt_page->slot[EVT_SINT].flags;
return flags[BIT_WORD(hv->evt_conn)] == BIT_MASK(hv->evt_conn) &&
hv->hvcall_status == 0 &&
atomic_read(&hv->sint_received) == 0;
}
static int run_test(int ncpus, int dst_add, ulong wait_cycles,
void (*func)(void *), bool (*is_ok)(int))
{
int i, ret = 0;
atomic_set(&ncpus_done, 0);
for (i = 0; i < ncpus; i++) {
ulong dst = (i + dst_add) % ncpus;
on_cpu_async(i, func, (void *)dst);
}
while (atomic_read(&ncpus_done) != ncpus)
pause();
while (wait_cycles--)
pause();
if (is_ok)
for (i = 0; i < ncpus; i++)
ret += is_ok(i);
return ret;
}
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
int main(int ac, char **av)
{
int ncpus, ncpus_ok, i;
if (!synic_supported()) {
report_skip("Hyper-V SynIC is not supported");
goto summary;
}
setup_vm();
smp_init();
ncpus = cpu_count();
if (ncpus > MAX_CPUS)
report_abort("# cpus: %d > %d", ncpus, MAX_CPUS);
handle_irq(MSG_VEC, sint_isr);
handle_irq(EVT_VEC, sint_isr);
setup_hypercall();
if (do_hypercall(HVCALL_SIGNAL_EVENT, 0x1234, 1) ==
HV_STATUS_INVALID_HYPERCALL_CODE) {
report_skip("Hyper-V SynIC connections are not supported");
goto summary;
}
for (i = 0; i < ncpus; i++)
on_cpu(i, setup_cpu, (void *)read_cr3());
ncpus_ok = run_test(ncpus, 0, WAIT_CYCLES, do_msg, msg_ok);
report("send message to self: %d/%d",
ncpus_ok == ncpus, ncpus_ok, ncpus);
run_test(ncpus, 0, 0, clear_msg, NULL);
ncpus_ok = run_test(ncpus, 1, WAIT_CYCLES, do_msg, msg_ok);
report("send message to another cpu: %d/%d",
ncpus_ok == ncpus, ncpus_ok, ncpus);
ncpus_ok = run_test(ncpus, 1, WAIT_CYCLES, do_msg, msg_busy);
report("send message to busy slot: %d/%d",
ncpus_ok == ncpus, ncpus_ok, ncpus);
ncpus_ok = run_test(ncpus, 0, WAIT_CYCLES, clear_msg, msg_ok);
report("receive pending message: %d/%d",
ncpus_ok == ncpus, ncpus_ok, ncpus);
ncpus_ok = run_test(ncpus, 0, WAIT_CYCLES, do_evt, evt_ok);
report("signal event on self: %d/%d",
ncpus_ok == ncpus, ncpus_ok, ncpus);
run_test(ncpus, 0, 0, clear_evt, NULL);
ncpus_ok = run_test(ncpus, 1, WAIT_CYCLES, do_evt, evt_ok);
report("signal event on another cpu: %d/%d",
ncpus_ok == ncpus, ncpus_ok, ncpus);
ncpus_ok = run_test(ncpus, 1, WAIT_CYCLES, do_evt, evt_busy);
report("signal event already set: %d/%d",
ncpus_ok == ncpus, ncpus_ok, ncpus);
for (i = 0; i < ncpus; i++)
on_cpu(i, teardown_cpu, NULL);
teardown_hypercall();
summary:
return report_summary();
}

View file

@ -0,0 +1,108 @@
/* msr tests */
#include "libcflat.h"
#include "processor.h"
#include "msr.h"
#include "desc.h"
static void test_syscall_lazy_load(void)
{
extern void syscall_target();
u16 cs = read_cs(), ss = read_ss();
ulong tmp;
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SCE);
wrmsr(MSR_LSTAR, (ulong)syscall_target);
wrmsr(MSR_STAR, (uint64_t)cs << 32);
asm volatile("pushf; syscall; syscall_target: popf" : "=c"(tmp) : : "r11");
write_ss(ss);
// will crash horribly if broken
report("MSR_*STAR eager loading", true);
}
/*
* test handling of TF in syscall/sysret: #DB is raised if TF
* is 1 at the *end* of syscall/sysret.
*
* This uses 32-bit syscall/sysret because KVM emulates it on Intel processors.
* However, the same bug happens with 64-bit syscall/sysret if two vCPUs
* "race" to force the emulation of syscall/sysret.
*/
static uint16_t code_segment_upon_db;
static void handle_db(struct ex_regs *regs)
{
code_segment_upon_db = regs->cs;
regs->rflags &= ~(1 << 8);
}
/* expects desired ring 3 flags in rax */
asm("syscall32_target:\n"
" cmp $0, code_segment_upon_db(%rip)\n"
" jne back_to_test\n"
" mov %eax,%r11d\n"
" sysretl\n");
/* 32-bit, ring-3 part of test_syscall_tf */
asm(" .code32\n"
"syscall_tf_user32:\n"
" pushf\n"
" pop %eax\n"
" or $(1<<8),%eax\n"
" push %eax\n"
" popf\n"
" syscall\n" /* singlestep trap taken after syscall */
" syscall\n" /* jumps back to test_syscall_tf's body */
" .code64\n");
static void test_syscall_tf(void)
{
extern void syscall32_target();
extern void syscall_tf_user32();
ulong rcx;
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SCE);
wrmsr(MSR_CSTAR, (ulong)syscall32_target);
wrmsr(MSR_STAR, ((uint64_t)USER_CS32 << 48) | ((uint64_t)KERNEL_CS64 << 32));
wrmsr(MSR_SYSCALL_MASK, X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_NT);
handle_exception(DB_VECTOR, handle_db);
/* good:
* sysret to syscall_tf_user32
* popf sets TF (singlestep starts on the next instruction)
* syscall to syscall32_target -> TF cleared and no singlestep
* sysretl sets TF
* handle_db sets code_segment_upon_db to USER_CS32 and clears TF
* syscall to syscall32_target
* syscall32_target jumps to back_to_test
*
* bad:
* sysret to syscall_tf_user32
* popf sets TF (singlestep starts on the next instruction)
* syscall to syscall32_target, TF cleared and wrong singlestep exception
* handle_db sets code_segment_upon_db to KERNEL_CS64
* syscall32_target jumps to back_to_test
*/
rcx = (ulong)syscall_tf_user32;
asm volatile(" push %%rbp\n"
" pushf; pop %%rax\n" // expected by syscall32_target
" sysret\n"
"back_to_test:\n"
" pop %%rbp"
: "+c"(rcx) :
: "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11",
"r12", "r13", "r14", "r15");
if (code_segment_upon_db != USER_CS32) {
printf("wrong CS (%#04x)!\n", code_segment_upon_db);
}
report("syscall TF handling", code_segment_upon_db == USER_CS32);
}
int main(int ac, char **av)
{
setup_idt();
test_syscall_lazy_load();
test_syscall_tf();
return report_summary();
}