diff options
Diffstat (limited to 'arch/powerpc/kvm')
43 files changed, 19836 insertions, 0 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c new file mode 100644 index 00000000..7b612a76 --- /dev/null +++ b/arch/powerpc/kvm/44x.c @@ -0,0 +1,180 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/export.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/tlbflush.h> +#include <asm/kvm_44x.h> +#include <asm/kvm_ppc.h> + +#include "44x_tlb.h" + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + kvmppc_44x_tlb_load(vcpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_44x_tlb_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; + int i; + + tlbe->tid = 0; + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; + tlbe->word1 = 0; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; + + tlbe++; + tlbe->tid = 0; + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; + tlbe->word1 = 0xef600000; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR + | PPC44x_TLB_I | PPC44x_TLB_G; + + /* Since the guest can directly access the timebase, it must know the + * real timebase frequency. Accordingly, it must see the state of + * CCR1[TCS]. */ + /* XXX CCR1 doesn't exist on all 440 SoCs. */ + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) + vcpu_44x->shadow_refs[i].gtlb_index = -1; + + vcpu->arch.cpu_type = KVM_CPU_440; + + return 0; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); + if (index == -1) { + tr->valid = 0; + return 0; + } + + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_44x *vcpu_44x; + struct kvm_vcpu *vcpu; + int err; + + vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_44x) { + err = -ENOMEM; + goto out; + } + + vcpu = &vcpu_44x->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_vcpu; + + return vcpu; + +uninit_vcpu: + kvm_vcpu_uninit(vcpu); +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + + free_page((unsigned long)vcpu->arch.shared); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +} + +static int __init kvmppc_44x_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); +} + +static void __exit kvmppc_44x_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_44x_init); +module_exit(kvmppc_44x_exit); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c new file mode 100644 index 00000000..549bb2c9 --- /dev/null +++ b/arch/powerpc/kvm/44x_emulate.c @@ -0,0 +1,183 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <asm/kvm_ppc.h> +#include <asm/dcr.h> +#include <asm/dcr-regs.h> +#include <asm/disassemble.h> +#include <asm/kvm_44x.h> +#include "timing.h" + +#include "booke.h" +#include "44x_tlb.h" + +#define XOP_MFDCR 323 +#define XOP_MTDCR 451 +#define XOP_TLBSX 914 +#define XOP_ICCCI 966 +#define XOP_TLBWE 978 + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int dcrn; + int ra; + int rb; + int rc; + int rs; + int rt; + int ws; + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + + case XOP_MFDCR: + dcrn = get_dcrn(inst); + rt = get_rt(inst); + + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + kvmppc_set_gpr(vcpu, rt, + mfdcr(DCRN_CPR0_CONFIG_DATA)); + local_irq_enable(); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = 0; + run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_MTDCR: + dcrn = get_dcrn(inst); + rs = get_rs(inst); + + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = kvmppc_get_gpr(vcpu, rs); + run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_TLBWE: + ra = get_ra(inst); + rs = get_rs(inst); + ws = get_ws(inst); + emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); + break; + + case XOP_TLBSX: + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + rc = get_rc(inst); + emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); + break; + + case XOP_ICCCI: + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + if (emulated == EMULATE_FAIL) + emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + + return emulated; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_PID: + kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break; + case SPRN_MMUCR: + vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break; + case SPRN_CCR0: + vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break; + case SPRN_CCR1: + vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break; + default: + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + } + + return emulated; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_PID: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break; + case SPRN_MMUCR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break; + case SPRN_CCR0: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break; + case SPRN_CCR1: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break; + default: + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + } + + return emulated; +} + diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c new file mode 100644 index 00000000..33aa715d --- /dev/null +++ b/arch/powerpc/kvm/44x_tlb.c @@ -0,0 +1,524 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> + +#include <asm/tlbflush.h> +#include <asm/mmu-44x.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_44x.h> +#include "timing.h" + +#include "44x_tlb.h" +#include "trace.h" + +#ifndef PPC44x_TLBE_SIZE +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#endif + +#define PAGE_SIZE_4K (1<<12) +#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) + +#define PPC44x_TLB_UATTR_MASK \ + (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) +#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) +#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) + +#ifdef DEBUG +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe; + int i; + + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); + printk("| %2s | %3s | %8s | %8s | %8s |\n", + "nr", "tid", "word0", "word1", "word2"); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + tlbe = &vcpu_44x->guest_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" G%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } +} +#endif + +static inline void kvmppc_44x_tlbie(unsigned int index) +{ + /* 0 <= index < 64, so the V bit is clear and we can use the index as + * word0. */ + asm volatile( + "tlbwe %[index], %[index], 0\n" + : + : [index] "r"(index) + ); +} + +static inline void kvmppc_44x_tlbre(unsigned int index, + struct kvmppc_44x_tlbe *tlbe) +{ + asm volatile( + "tlbre %[word0], %[index], 0\n" + "mfspr %[tid], %[sprn_mmucr]\n" + "andi. %[tid], %[tid], 0xff\n" + "tlbre %[word1], %[index], 1\n" + "tlbre %[word2], %[index], 2\n" + : [word0] "=r"(tlbe->word0), + [word1] "=r"(tlbe->word1), + [word2] "=r"(tlbe->word2), + [tid] "=r"(tlbe->tid) + : [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + : "cc" + ); +} + +static inline void kvmppc_44x_tlbwe(unsigned int index, + struct kvmppc_44x_tlbe *stlbe) +{ + unsigned long tmp; + + asm volatile( + "mfspr %[tmp], %[sprn_mmucr]\n" + "rlwimi %[tmp], %[tid], 0, 0xff\n" + "mtspr %[sprn_mmucr], %[tmp]\n" + "tlbwe %[word0], %[index], 0\n" + "tlbwe %[word1], %[index], 1\n" + "tlbwe %[word2], %[index], 2\n" + : [tmp] "=&r"(tmp) + : [word0] "r"(stlbe->word0), + [word1] "r"(stlbe->word1), + [word2] "r"(stlbe->word2), + [tid] "r"(stlbe->tid), + [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + ); +} + +static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) +{ + /* We only care about the guest's permission and user bits. */ + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; + + if (!usermode) { + /* Guest is in supervisor mode, so we need to translate guest + * supervisor permissions into user permissions. */ + attrib &= ~PPC44x_TLB_USER_PERM_MASK; + attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; + } + + /* Make sure host can always access this memory. */ + attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; + + /* WIMGE = 0b00100 */ + attrib |= PPC44x_TLB_M; + + return attrib; +} + +/* Load shadow TLB back into hardware. */ +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbwe(i, stlbe); + } +} + +static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int i) +{ + vcpu_44x->shadow_tlb_mod[i] = 1; +} + +/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (vcpu_44x->shadow_tlb_mod[i]) + kvmppc_44x_tlbre(i, stlbe); + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbie(i); + } +} + + +/* Search the guest TLB for a matching entry. */ +int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, + unsigned int as) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; + unsigned int tid; + + if (eaddr < get_tlb_eaddr(tlbe)) + continue; + + if (eaddr > get_tlb_end(tlbe)) + continue; + + tid = get_tlb_tid(tlbe); + if (tid && (tid != pid)) + continue; + + if (!get_tlb_v(tlbe)) + continue; + + if (get_tlb_ts(tlbe) != as) + continue; + + return i; + } + + return -1; +} + +gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, + gva_t eaddr) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; + unsigned int pgmask = get_tlb_bytes(gtlbe) - 1; + + return get_tlb_raddr(gtlbe) | (eaddr & pgmask); +} + +int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); + + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); +} + +int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); + + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); +} + +void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) +{ +} + +static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int stlb_index) +{ + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; + + if (!ref->page) + return; + + /* Discard from the TLB. */ + /* Note: we could actually invalidate a host mapping, if the host overwrote + * this TLB entry since we inserted a guest mapping. */ + kvmppc_44x_tlbie(stlb_index); + + /* Now release the page. */ + if (ref->writeable) + kvm_release_page_dirty(ref->page); + else + kvm_release_page_clean(ref->page); + + ref->page = NULL; + + /* XXX set tlb_44x_index to stlb_index? */ + + trace_kvm_stlb_inval(stlb_index); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) + kvmppc_44x_shadow_release(vcpu_44x, i); +} + +/** + * kvmppc_mmu_map -- create a host mapping for guest memory + * + * If the guest wanted a larger page than the host supports, only the first + * host page is mapped here and the rest are demand faulted. + * + * If the guest wanted a smaller page than the host page size, we map only the + * guest-size page (i.e. not a full host page mapping). + * + * Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. + */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, + unsigned int gtlb_index) +{ + struct kvmppc_44x_tlbe stlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; + struct kvmppc_44x_shadow_ref *ref; + struct page *new_page; + hpa_t hpaddr; + gfn_t gfn; + u32 asid = gtlbe->tid; + u32 flags = gtlbe->word2; + u32 max_bytes = get_tlb_bytes(gtlbe); + unsigned int victim; + + /* Select TLB entry to clobber. Indirectly guard against races with the TLB + * miss handler by disabling interrupts. */ + local_irq_disable(); + victim = ++tlb_44x_index; + if (victim > tlb_44x_hwater) + victim = 0; + tlb_44x_index = victim; + local_irq_enable(); + + /* Get reference to new page. */ + gfn = gpaddr >> PAGE_SHIFT; + new_page = gfn_to_page(vcpu->kvm, gfn); + if (is_error_page(new_page)) { + printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", + (unsigned long long)gfn); + kvm_release_page_clean(new_page); + return; + } + hpaddr = page_to_phys(new_page); + + /* Invalidate any previous shadow mappings. */ + kvmppc_44x_shadow_release(vcpu_44x, victim); + + /* XXX Make sure (va, size) doesn't overlap any other + * entries. 440x6 user manual says the result would be + * "undefined." */ + + /* XXX what about AS? */ + + /* Force TS=1 for all guest mappings. */ + stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; + + if (max_bytes >= PAGE_SIZE) { + /* Guest mapping is larger than or equal to host page size. We can use + * a "native" host mapping. */ + stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; + } else { + /* Guest mapping is smaller than host page size. We must restrict the + * size of the mapping to be at most the smaller of the two, but for + * simplicity we fall back to a 4K mapping (this is probably what the + * guest is using anyways). */ + stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; + + /* 'hpaddr' is a host page, which is larger than the mapping we're + * inserting here. To compensate, we must add the in-page offset to the + * sub-page. */ + hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); + } + + stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); + stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, + vcpu->arch.shared->msr & MSR_PR); + stlbe.tid = !(asid & 0xff); + + /* Keep track of the reference so we can properly release it later. */ + ref = &vcpu_44x->shadow_refs[victim]; + ref->page = new_page; + ref->gtlb_index = gtlb_index; + ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); + ref->tid = stlbe.tid; + + /* Insert shadow mapping into hardware TLB. */ + kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); + kvmppc_44x_tlbwe(victim, &stlbe); + trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1, + stlbe.word2); +} + +/* For a particular guest TLB entry, invalidate the corresponding host TLB + * mappings and release the host pages. */ +static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, + unsigned int gtlb_index) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + if (ref->gtlb_index == gtlb_index) + kvmppc_44x_shadow_release(vcpu_44x, i); + } +} + +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +{ + int usermode = vcpu->arch.shared->msr & MSR_PR; + + vcpu->arch.shadow_pid = !usermode; +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + if (unlikely(vcpu->arch.pid == new_pid)) + return; + + vcpu->arch.pid = new_pid; + + /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it + * can't access guest kernel mappings (TID=1). When we switch to a new + * guest PID, which will also use host PID=0, we must discard the old guest + * userspace mappings. */ + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + + if (ref->tid == 0) + kvmppc_44x_shadow_release(vcpu_44x, i); + } +} + +static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct kvmppc_44x_tlbe *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe; + unsigned int gtlb_index; + + gtlb_index = kvmppc_get_gpr(vcpu, ra); + if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { + printk("%s: index %d\n", __func__, gtlb_index); + kvmppc_dump_vcpu(vcpu); + return EMULATE_FAIL; + } + + tlbe = &vcpu_44x->guest_tlb[gtlb_index]; + + /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ + if (tlbe->word0 & PPC44x_TLB_VALID) + kvmppc_44x_invalidate(vcpu, gtlb_index); + + switch (ws) { + case PPC44x_TLB_PAGEID: + tlbe->tid = get_mmucr_stid(vcpu); + tlbe->word0 = kvmppc_get_gpr(vcpu, rs); + break; + + case PPC44x_TLB_XLAT: + tlbe->word1 = kvmppc_get_gpr(vcpu, rs); + break; + + case PPC44x_TLB_ATTRIB: + tlbe->word2 = kvmppc_get_gpr(vcpu, rs); + break; + + default: + return EMULATE_FAIL; + } + + if (tlbe_is_host_safe(vcpu, tlbe)) { + gva_t eaddr; + gpa_t gpaddr; + u32 bytes; + + eaddr = get_tlb_eaddr(tlbe); + gpaddr = get_tlb_raddr(tlbe); + + /* Use the advertised page size to mask effective and real addrs. */ + bytes = get_tlb_bytes(tlbe); + eaddr &= ~(bytes - 1); + gpaddr &= ~(bytes - 1); + + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); + } + + trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); + return EMULATE_DONE; +} + +int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) +{ + u32 ea; + int gtlb_index; + unsigned int as = get_mmucr_sts(vcpu); + unsigned int pid = get_mmucr_stid(vcpu); + + ea = kvmppc_get_gpr(vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(vcpu, ra); + + gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); + if (rc) { + u32 cr = kvmppc_get_cr(vcpu); + + if (gtlb_index < 0) + kvmppc_set_cr(vcpu, cr & ~0x20000000); + else + kvmppc_set_cr(vcpu, cr | 0x20000000); + } + kvmppc_set_gpr(vcpu, rt, gtlb_index); + + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); + return EMULATE_DONE; +} diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h new file mode 100644 index 00000000..a9ff80e5 --- /dev/null +++ b/arch/powerpc/kvm/44x_tlb.h @@ -0,0 +1,86 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __KVM_POWERPC_TLB_H__ +#define __KVM_POWERPC_TLB_H__ + +#include <linux/kvm_host.h> +#include <asm/mmu-44x.h> + +extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, + unsigned int pid, unsigned int as); + +extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, + u8 rc); +extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); + +/* TLB helper functions */ +static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) +{ + return (tlbe->word0 >> 4) & 0xf; +} + +static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) +{ + return tlbe->word0 & 0xfffffc00; +} + +static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1 << 10 << (pgsize << 1); +} + +static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) +{ + return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; +} + +static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) +{ + u64 word1 = tlbe->word1; + return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); +} + +static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) +{ + return tlbe->tid & 0xff; +} + +static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) +{ + return (tlbe->word0 >> 8) & 0x1; +} + +static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) +{ + return (tlbe->word0 >> 9) & 0x1; +} + +static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmucr & 0xff; +} + +static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.mmucr >> 16) & 0x1; +} + +#endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig new file mode 100644 index 00000000..8f64709a --- /dev/null +++ b/arch/powerpc/kvm/Kconfig @@ -0,0 +1,134 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool + select PREEMPT_NOTIFIERS + select ANON_INODES + +config KVM_BOOK3S_HANDLER + bool + +config KVM_BOOK3S_32_HANDLER + bool + select KVM_BOOK3S_HANDLER + select KVM_MMIO + +config KVM_BOOK3S_64_HANDLER + bool + select KVM_BOOK3S_HANDLER + +config KVM_BOOK3S_PR + bool + select KVM_MMIO + +config KVM_BOOK3S_32 + tristate "KVM support for PowerPC book3s_32 processors" + depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT + select KVM + select KVM_BOOK3S_32_HANDLER + select KVM_BOOK3S_PR + ---help--- + Support running unmodified book3s_32 guest kernels + in virtual machines on book3s_32 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_BOOK3S_64 + tristate "KVM support for PowerPC book3s_64 processors" + depends on EXPERIMENTAL && PPC_BOOK3S_64 + select KVM_BOOK3S_64_HANDLER + select KVM + ---help--- + Support running unmodified book3s_64 and book3s_32 guest kernels + in virtual machines on book3s_64 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_BOOK3S_64_HV + bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" + depends on KVM_BOOK3S_64 + select MMU_NOTIFIER + ---help--- + Support running unmodified book3s_64 guest kernels in + virtual machines on POWER7 and PPC970 processors that have + hypervisor mode available to the host. + + If you say Y here, KVM will use the hardware virtualization + facilities of POWER7 (and later) processors, meaning that + guest operating systems will run at full hardware speed + using supervisor and user modes. However, this also means + that KVM is not usable under PowerVM (pHyp), is only usable + on POWER7 (or later) processors and PPC970-family processors, + and cannot emulate a different processor from the host processor. + + If unsure, say N. + +config KVM_BOOK3S_64_PR + def_bool y + depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV + select KVM_BOOK3S_PR + +config KVM_440 + bool "KVM support for PowerPC 440 processors" + depends on EXPERIMENTAL && 44x + select KVM + select KVM_MMIO + ---help--- + Support running unmodified 440 guest kernels in virtual machines on + 440 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_EXIT_TIMING + bool "Detailed exit timing" + depends on KVM_440 || KVM_E500 + ---help--- + Calculate elapsed time for every exit/enter cycle. A per-vcpu + report is available in debugfs kvm/vm#_vcpu#_timing. + The overhead is relatively small, however it is not recommended for + production environments. + + If unsure, say N. + +config KVM_E500 + bool "KVM support for PowerPC E500 processors" + depends on EXPERIMENTAL && E500 + select KVM + select KVM_MMIO + ---help--- + Support running unmodified E500 guest kernels in virtual machines on + E500 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +source drivers/vhost/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile new file mode 100644 index 00000000..3688aeec --- /dev/null +++ b/arch/powerpc/kvm/Makefile @@ -0,0 +1,95 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm + +common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) + +CFLAGS_44x_tlb.o := -I. +CFLAGS_e500_tlb.o := -I. +CFLAGS_emulate.o := -I. + +common-objs-y += powerpc.o emulate.o +obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o +obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o + +AFLAGS_booke_interrupts.o := -I$(obj) + +kvm-440-objs := \ + $(common-objs-y) \ + booke.o \ + booke_emulate.o \ + booke_interrupts.o \ + 44x.o \ + 44x_tlb.o \ + 44x_emulate.o +kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs) + +kvm-e500-objs := \ + $(common-objs-y) \ + booke.o \ + booke_emulate.o \ + booke_interrupts.o \ + e500.o \ + e500_tlb.o \ + e500_emulate.o +kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) + +kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ + ../../../virt/kvm/coalesced_mmio.o \ + fpu.o \ + book3s_paired_singles.o \ + book3s_pr.o \ + book3s_pr_papr.o \ + book3s_emulate.o \ + book3s_interrupts.o \ + book3s_mmu_hpte.o \ + book3s_64_mmu_host.o \ + book3s_64_mmu.o \ + book3s_32_mmu.o +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ + book3s_rmhandlers.o + +kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ + book3s_hv.o \ + book3s_hv_interrupts.o \ + book3s_64_mmu_hv.o +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ + book3s_hv_rmhandlers.o \ + book3s_hv_rm_mmu.o \ + book3s_64_vio_hv.o \ + book3s_hv_builtin.o + +kvm-book3s_64-module-objs := \ + ../../../virt/kvm/kvm_main.o \ + powerpc.o \ + emulate.o \ + book3s.o \ + $(kvm-book3s_64-objs-y) + +kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) + +kvm-book3s_32-objs := \ + $(common-objs-y) \ + fpu.o \ + book3s_paired_singles.o \ + book3s.o \ + book3s_pr.o \ + book3s_emulate.o \ + book3s_interrupts.o \ + book3s_mmu_hpte.o \ + book3s_32_mmu_host.o \ + book3s_32_mmu.o +kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) + +kvm-objs := $(kvm-objs-m) $(kvm-objs-y) + +obj-$(CONFIG_KVM_440) += kvm.o +obj-$(CONFIG_KVM_E500) += kvm.o +obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o +obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o + +obj-y += $(kvm-book3s_64-builtin-objs-y) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c new file mode 100644 index 00000000..7d54f4ed --- /dev/null +++ b/arch/powerpc/kvm/book3s.c @@ -0,0 +1,486 @@ +/* + * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * Kevin Wolf <mail@kevin-wolf.de> + * + * Description: + * This file is derived from arch/powerpc/kvm/44x.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/slab.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu_context.h> +#include <asm/page.h> +#include <linux/gfp.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> +#include <linux/highmem.h> + +#include "trace.h" + +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +/* #define EXIT_DEBUG */ + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "exits", VCPU_STAT(sum_exits) }, + { "mmio", VCPU_STAT(mmio_exits) }, + { "sig", VCPU_STAT(signal_exits) }, + { "sysc", VCPU_STAT(syscall_exits) }, + { "inst_emu", VCPU_STAT(emulated_inst_exits) }, + { "dec", VCPU_STAT(dec_exits) }, + { "ext_intr", VCPU_STAT(ext_intr_exits) }, + { "queue_intr", VCPU_STAT(queue_intr) }, + { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "pf_storage", VCPU_STAT(pf_storage) }, + { "sp_storage", VCPU_STAT(sp_storage) }, + { "pf_instruc", VCPU_STAT(pf_instruc) }, + { "sp_instruc", VCPU_STAT(sp_instruc) }, + { "ld", VCPU_STAT(ld) }, + { "ld_slow", VCPU_STAT(ld_slow) }, + { "st", VCPU_STAT(st) }, + { "st_slow", VCPU_STAT(st_slow) }, + { NULL } +}; + +void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) +{ + vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); + vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags; + kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); + vcpu->arch.mmu.reset_msr(vcpu); +} + +static int kvmppc_book3s_vec2irqprio(unsigned int vec) +{ + unsigned int prio; + + switch (vec) { + case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; + case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; + case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break; + case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break; + case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; + case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; + case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; + case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break; + case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; + case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; + case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; + case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break; + case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break; + case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break; + case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break; + case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break; + default: prio = BOOK3S_IRQPRIO_MAX; break; + } + + return prio; +} + +static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, + unsigned int vec) +{ + unsigned long old_pending = vcpu->arch.pending_exceptions; + + clear_bit(kvmppc_book3s_vec2irqprio(vec), + &vcpu->arch.pending_exceptions); + + kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions, + old_pending); +} + +void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) +{ + vcpu->stat.queue_intr++; + + set_bit(kvmppc_book3s_vec2irqprio(vec), + &vcpu->arch.pending_exceptions); +#ifdef EXIT_DEBUG + printk(KERN_INFO "Queueing interrupt %x\n", vec); +#endif +} + + +void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) +{ + /* might as well deliver this straight away */ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags); +} + +void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) +{ + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); +} + +int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) +{ + return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); +} + +void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) +{ + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); +} + +void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL; + + if (irq->irq == KVM_INTERRUPT_SET_LEVEL) + vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; + + kvmppc_book3s_queue_irqprio(vcpu, vec); +} + +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); +} + +int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) +{ + int deliver = 1; + int vec = 0; + bool crit = kvmppc_critical_section(vcpu); + + switch (priority) { + case BOOK3S_IRQPRIO_DECREMENTER: + deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; + vec = BOOK3S_INTERRUPT_DECREMENTER; + break; + case BOOK3S_IRQPRIO_EXTERNAL: + case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: + deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; + vec = BOOK3S_INTERRUPT_EXTERNAL; + break; + case BOOK3S_IRQPRIO_SYSTEM_RESET: + vec = BOOK3S_INTERRUPT_SYSTEM_RESET; + break; + case BOOK3S_IRQPRIO_MACHINE_CHECK: + vec = BOOK3S_INTERRUPT_MACHINE_CHECK; + break; + case BOOK3S_IRQPRIO_DATA_STORAGE: + vec = BOOK3S_INTERRUPT_DATA_STORAGE; + break; + case BOOK3S_IRQPRIO_INST_STORAGE: + vec = BOOK3S_INTERRUPT_INST_STORAGE; + break; + case BOOK3S_IRQPRIO_DATA_SEGMENT: + vec = BOOK3S_INTERRUPT_DATA_SEGMENT; + break; + case BOOK3S_IRQPRIO_INST_SEGMENT: + vec = BOOK3S_INTERRUPT_INST_SEGMENT; + break; + case BOOK3S_IRQPRIO_ALIGNMENT: + vec = BOOK3S_INTERRUPT_ALIGNMENT; + break; + case BOOK3S_IRQPRIO_PROGRAM: + vec = BOOK3S_INTERRUPT_PROGRAM; + break; + case BOOK3S_IRQPRIO_VSX: + vec = BOOK3S_INTERRUPT_VSX; + break; + case BOOK3S_IRQPRIO_ALTIVEC: + vec = BOOK3S_INTERRUPT_ALTIVEC; + break; + case BOOK3S_IRQPRIO_FP_UNAVAIL: + vec = BOOK3S_INTERRUPT_FP_UNAVAIL; + break; + case BOOK3S_IRQPRIO_SYSCALL: + vec = BOOK3S_INTERRUPT_SYSCALL; + break; + case BOOK3S_IRQPRIO_DEBUG: + vec = BOOK3S_INTERRUPT_TRACE; + break; + case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR: + vec = BOOK3S_INTERRUPT_PERFMON; + break; + default: + deliver = 0; + printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); + break; + } + +#if 0 + printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver); +#endif + + if (deliver) + kvmppc_inject_interrupt(vcpu, vec, 0); + + return deliver; +} + +/* + * This function determines if an irqprio should be cleared once issued. + */ +static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) +{ + switch (priority) { + case BOOK3S_IRQPRIO_DECREMENTER: + /* DEC interrupts get cleared by mtdec */ + return false; + case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: + /* External interrupts get cleared by userspace */ + return false; + } + + return true; +} + +void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) +{ + unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned long old_pending = vcpu->arch.pending_exceptions; + unsigned int priority; + +#ifdef EXIT_DEBUG + if (vcpu->arch.pending_exceptions) + printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); +#endif + priority = __ffs(*pending); + while (priority < BOOK3S_IRQPRIO_MAX) { + if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && + clear_irqprio(vcpu, priority)) { + clear_bit(priority, &vcpu->arch.pending_exceptions); + break; + } + + priority = find_next_bit(pending, + BITS_PER_BYTE * sizeof(*pending), + priority + 1); + } + + /* Tell the guest about our interrupt status */ + kvmppc_update_int_pending(vcpu, *pending, old_pending); +} + +pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + ulong mp_pa = vcpu->arch.magic_page_pa; + + /* Magic page override */ + if (unlikely(mp_pa) && + unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == + ((mp_pa & PAGE_MASK) & KVM_PAM))) { + ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + pfn_t pfn; + + pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; + get_page(pfn_to_page(pfn)); + return pfn; + } + + return gfn_to_pfn(vcpu->kvm, gfn); +} + +static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, + struct kvmppc_pte *pte) +{ + int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); + int r; + + if (relocated) { + r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); + } else { + pte->eaddr = eaddr; + pte->raddr = eaddr & KVM_PAM; + pte->vpage = VSID_REAL | eaddr >> 12; + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + r = 0; + } + + return r; +} + +static hva_t kvmppc_bad_hva(void) +{ + return PAGE_OFFSET; +} + +static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, + bool read) +{ + hva_t hpage; + + if (read && !pte->may_read) + goto err; + + if (!read && !pte->may_write) + goto err; + + hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (kvm_is_error_hva(hpage)) + goto err; + + return hpage | (pte->raddr & ~PAGE_MASK); +err: + return kvmppc_bad_hva(); +} + +int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + struct kvmppc_pte pte; + + vcpu->stat.st++; + + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + return -ENOENT; + + *eaddr = pte.raddr; + + if (!pte.may_write) + return -EPERM; + + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; + + return EMULATE_DONE; +} + +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + struct kvmppc_pte pte; + hva_t hva = *eaddr; + + vcpu->stat.ld++; + + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + goto nopte; + + *eaddr = pte.raddr; + + hva = kvmppc_pte_to_hva(vcpu, &pte, true); + if (kvm_is_error_hva(hva)) + goto mmio; + + if (copy_from_user(ptr, (void __user *)hva, size)) { + printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); + goto mmio; + } + + return EMULATE_DONE; + +nopte: + return -ENOENT; +mmio: + return EMULATE_DO_MMIO; +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + regs->pc = kvmppc_get_pc(vcpu); + regs->cr = kvmppc_get_cr(vcpu); + regs->ctr = kvmppc_get_ctr(vcpu); + regs->lr = kvmppc_get_lr(vcpu); + regs->xer = kvmppc_get_xer(vcpu); + regs->msr = vcpu->arch.shared->msr; + regs->srr0 = vcpu->arch.shared->srr0; + regs->srr1 = vcpu->arch.shared->srr1; + regs->pid = vcpu->arch.pid; + regs->sprg0 = vcpu->arch.shared->sprg0; + regs->sprg1 = vcpu->arch.shared->sprg1; + regs->sprg2 = vcpu->arch.shared->sprg2; + regs->sprg3 = vcpu->arch.shared->sprg3; + regs->sprg4 = vcpu->arch.shared->sprg4; + regs->sprg5 = vcpu->arch.shared->sprg5; + regs->sprg6 = vcpu->arch.shared->sprg6; + regs->sprg7 = vcpu->arch.shared->sprg7; + + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) + regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + kvmppc_set_pc(vcpu, regs->pc); + kvmppc_set_cr(vcpu, regs->cr); + kvmppc_set_ctr(vcpu, regs->ctr); + kvmppc_set_lr(vcpu, regs->lr); + kvmppc_set_xer(vcpu, regs->xer); + kvmppc_set_msr(vcpu, regs->msr); + vcpu->arch.shared->srr0 = regs->srr0; + vcpu->arch.shared->srr1 = regs->srr1; + vcpu->arch.shared->sprg0 = regs->sprg0; + vcpu->arch.shared->sprg1 = regs->sprg1; + vcpu->arch.shared->sprg2 = regs->sprg2; + vcpu->arch.shared->sprg3 = regs->sprg3; + vcpu->arch.shared->sprg4 = regs->sprg4; + vcpu->arch.shared->sprg5 = regs->sprg5; + vcpu->arch.shared->sprg6 = regs->sprg6; + vcpu->arch.shared->sprg7 = regs->sprg7; + + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) + kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return 0; +} + +void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_core_queue_dec(vcpu); + kvm_vcpu_kick(vcpu); +} diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c new file mode 100644 index 00000000..c8cefdd1 --- /dev/null +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -0,0 +1,419 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> + +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> + +/* #define DEBUG_MMU */ +/* #define DEBUG_MMU_PTE */ +/* #define DEBUG_MMU_PTE_IP 0xfff14c40 */ + +#ifdef DEBUG_MMU +#define dprintk(X...) printk(KERN_INFO X) +#else +#define dprintk(X...) do { } while(0) +#endif + +#ifdef DEBUG_MMU_PTE +#define dprintk_pte(X...) printk(KERN_INFO X) +#else +#define dprintk_pte(X...) do { } while(0) +#endif + +#define PTEG_FLAG_ACCESSED 0x00000100 +#define PTEG_FLAG_DIRTY 0x00000080 +#ifndef SID_SHIFT +#define SID_SHIFT 28 +#endif + +static inline bool check_debug_ip(struct kvm_vcpu *vcpu) +{ +#ifdef DEBUG_MMU_PTE_IP + return vcpu->arch.pc == DEBUG_MMU_PTE_IP; +#else + return true; +#endif +} + +static inline u32 sr_vsid(u32 sr_raw) +{ + return sr_raw & 0x0fffffff; +} + +static inline bool sr_valid(u32 sr_raw) +{ + return (sr_raw & 0x80000000) ? false : true; +} + +static inline bool sr_ks(u32 sr_raw) +{ + return (sr_raw & 0x40000000) ? true: false; +} + +static inline bool sr_kp(u32 sr_raw) +{ + return (sr_raw & 0x20000000) ? true: false; +} + +static inline bool sr_nx(u32 sr_raw) +{ + return (sr_raw & 0x10000000) ? true: false; +} + +static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *pte, bool data); +static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, + u64 *vsid); + +static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf]; +} + +static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, + bool data) +{ + u64 vsid; + struct kvmppc_pte pte; + + if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) + return pte.vpage; + + kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); + return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); +} + +static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) +{ + kvmppc_set_msr(vcpu, 0); +} + +static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, + u32 sre, gva_t eaddr, + bool primary) +{ + u32 page, hash, pteg, htabmask; + hva_t r; + + page = (eaddr & 0x0FFFFFFF) >> 12; + htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0; + + hash = ((sr_vsid(sre) ^ page) << 6); + if (!primary) + hash = ~hash; + hash &= htabmask; + + pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash; + + dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n", + kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, + sr_vsid(sre)); + + r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); + if (kvm_is_error_hva(r)) + return r; + return r | (pteg & ~PAGE_MASK); +} + +static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary) +{ + return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) | + (primary ? 0 : 0x40) | 0x80000000; +} + +static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *pte, bool data) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + struct kvmppc_bat *bat; + int i; + + for (i = 0; i < 8; i++) { + if (data) + bat = &vcpu_book3s->dbat[i]; + else + bat = &vcpu_book3s->ibat[i]; + + if (vcpu->arch.shared->msr & MSR_PR) { + if (!bat->vp) + continue; + } else { + if (!bat->vs) + continue; + } + + if (check_debug_ip(vcpu)) + { + dprintk_pte("%cBAT %02d: 0x%lx - 0x%x (0x%x)\n", + data ? 'd' : 'i', i, eaddr, bat->bepi, + bat->bepi_mask); + } + if ((eaddr & bat->bepi_mask) == bat->bepi) { + u64 vsid; + kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, + eaddr >> SID_SHIFT, &vsid); + vsid <<= 16; + pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid; + + pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); + pte->may_read = bat->pp; + pte->may_write = bat->pp > 1; + pte->may_execute = true; + if (!pte->may_read) { + printk(KERN_INFO "BAT is not readable!\n"); + continue; + } + if (!pte->may_write) { + /* let's treat r/o BATs as not-readable for now */ + dprintk_pte("BAT is read-only!\n"); + continue; + } + + return 0; + } + } + + return -ENOENT; +} + +static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *pte, bool data, + bool primary) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + u32 sre; + hva_t ptegp; + u32 pteg[16]; + u32 ptem = 0; + int i; + int found = 0; + + sre = find_sr(vcpu, eaddr); + + dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28, + sr_vsid(sre), sre); + + pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); + + ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary); + if (kvm_is_error_hva(ptegp)) { + printk(KERN_INFO "KVM: Invalid PTEG!\n"); + goto no_page_found; + } + + ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary); + + if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { + printk(KERN_ERR "KVM: Can't copy data from 0x%lx!\n", ptegp); + goto no_page_found; + } + + for (i=0; i<16; i+=2) { + if (ptem == pteg[i]) { + u8 pp; + + pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF); + pp = pteg[i+1] & 3; + + if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) || + (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR))) + pp |= 4; + + pte->may_write = false; + pte->may_read = false; + pte->may_execute = true; + switch (pp) { + case 0: + case 1: + case 2: + case 6: + pte->may_write = true; + case 3: + case 5: + case 7: + pte->may_read = true; + break; + } + + if ( !pte->may_read ) + continue; + + dprintk_pte("MMU: Found PTE -> %x %x - %x\n", + pteg[i], pteg[i+1], pp); + found = 1; + break; + } + } + + /* Update PTE C and A bits, so the guest's swapper knows we used the + page */ + if (found) { + u32 oldpte = pteg[i+1]; + + if (pte->may_read) + pteg[i+1] |= PTEG_FLAG_ACCESSED; + if (pte->may_write) + pteg[i+1] |= PTEG_FLAG_DIRTY; + else + dprintk_pte("KVM: Mapping read-only page!\n"); + + /* Write back into the PTEG */ + if (pteg[i+1] != oldpte) + copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); + + return 0; + } + +no_page_found: + + if (check_debug_ip(vcpu)) { + dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n", + to_book3s(vcpu)->sdr1, ptegp); + for (i=0; i<16; i+=2) { + dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n", + i, pteg[i], pteg[i+1], ptem); + } + } + + return -ENOENT; +} + +static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *pte, bool data) +{ + int r; + ulong mp_ea = vcpu->arch.magic_page_ea; + + pte->eaddr = eaddr; + + /* Magic page override */ + if (unlikely(mp_ea) && + unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); + pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff); + pte->raddr &= KVM_PAM; + pte->may_execute = true; + pte->may_read = true; + pte->may_write = true; + + return 0; + } + + r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); + if (r < 0) + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); + if (r < 0) + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false); + + return r; +} + + +static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum) +{ + return vcpu->arch.shared->sr[srnum]; +} + +static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, + ulong value) +{ + vcpu->arch.shared->sr[srnum] = value; + kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT); +} + +static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) +{ + kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); +} + +static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, + u64 *vsid) +{ + ulong ea = esid << SID_SHIFT; + u32 sr; + u64 gvsid = esid; + + if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + sr = find_sr(vcpu, ea); + if (sr_valid(sr)) + gvsid = sr_vsid(sr); + } + + /* In case we only have one of MSR_IR or MSR_DR set, let's put + that in the real-mode context (and hope RM doesn't access + high memory) */ + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + case 0: + *vsid = VSID_REAL | esid; + break; + case MSR_IR: + *vsid = VSID_REAL_IR | gvsid; + break; + case MSR_DR: + *vsid = VSID_REAL_DR | gvsid; + break; + case MSR_DR|MSR_IR: + if (sr_valid(sr)) + *vsid = sr_vsid(sr); + else + *vsid = VSID_BAT | gvsid; + break; + default: + BUG(); + } + + if (vcpu->arch.shared->msr & MSR_PR) + *vsid |= VSID_PR; + + return 0; +} + +static bool kvmppc_mmu_book3s_32_is_dcbz32(struct kvm_vcpu *vcpu) +{ + return true; +} + + +void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) +{ + struct kvmppc_mmu *mmu = &vcpu->arch.mmu; + + mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin; + mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin; + mmu->xlate = kvmppc_mmu_book3s_32_xlate; + mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr; + mmu->tlbie = kvmppc_mmu_book3s_32_tlbie; + mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid; + mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp; + mmu->is_dcbz32 = kvmppc_mmu_book3s_32_is_dcbz32; + + mmu->slbmte = NULL; + mmu->slbmfee = NULL; + mmu->slbmfev = NULL; + mmu->slbie = NULL; + mmu->slbia = NULL; +} diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c new file mode 100644 index 00000000..f922c29b --- /dev/null +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -0,0 +1,393 @@ +/* + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu-hash32.h> +#include <asm/machdep.h> +#include <asm/mmu_context.h> +#include <asm/hw_irq.h> + +/* #define DEBUG_MMU */ +/* #define DEBUG_SR */ + +#ifdef DEBUG_MMU +#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) +#else +#define dprintk_mmu(a, ...) do { } while(0) +#endif + +#ifdef DEBUG_SR +#define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__) +#else +#define dprintk_sr(a, ...) do { } while(0) +#endif + +#if PAGE_SHIFT != 12 +#error Unknown page size +#endif + +#ifdef CONFIG_SMP +#error XXX need to grab mmu_hash_lock +#endif + +#ifdef CONFIG_PTE_64BIT +#error Only 32 bit pages are supported for now +#endif + +static ulong htab; +static u32 htabmask; + +void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) +{ + volatile u32 *pteg; + + /* Remove from host HTAB */ + pteg = (u32*)pte->slot; + pteg[0] = 0; + + /* And make sure it's gone from the TLB too */ + asm volatile ("sync"); + asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); + asm volatile ("sync"); + asm volatile ("tlbsync"); +} + +/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using + * a hash, so we don't waste cycles on looping */ +static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) +{ + return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); +} + + +static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) +{ + struct kvmppc_sid_map *map; + u16 sid_map_mask; + + if (vcpu->arch.shared->msr & MSR_PR) + gvsid |= VSID_PR; + + sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); + map = &to_book3s(vcpu)->sid_map[sid_map_mask]; + if (map->guest_vsid == gvsid) { + dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", + gvsid, map->host_vsid); + return map; + } + + map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; + if (map->guest_vsid == gvsid) { + dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", + gvsid, map->host_vsid); + return map; + } + + dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid); + return NULL; +} + +static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr, + bool primary) +{ + u32 page, hash; + ulong pteg = htab; + + page = (eaddr & ~ESID_MASK) >> 12; + + hash = ((vsid ^ page) << 6); + if (!primary) + hash = ~hash; + + hash &= htabmask; + + pteg |= hash; + + dprintk_mmu("htab: %lx | hash: %x | htabmask: %x | pteg: %lx\n", + htab, hash, htabmask, pteg); + + return (u32*)pteg; +} + +extern char etext[]; + +int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) +{ + pfn_t hpaddr; + u64 va; + u64 vsid; + struct kvmppc_sid_map *map; + volatile u32 *pteg; + u32 eaddr = orig_pte->eaddr; + u32 pteg0, pteg1; + register int rr = 0; + bool primary = false; + bool evict = false; + struct hpte_cache *pte; + int r = 0; + + /* Get host physical address for gpa */ + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); + if (is_error_pfn(hpaddr)) { + printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", + orig_pte->eaddr); + r = -EINVAL; + goto out; + } + hpaddr <<= PAGE_SHIFT; + + /* and write the mapping ea -> hpa into the pt */ + vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); + map = find_sid_vsid(vcpu, vsid); + if (!map) { + kvmppc_mmu_map_segment(vcpu, eaddr); + map = find_sid_vsid(vcpu, vsid); + } + BUG_ON(!map); + + vsid = map->host_vsid; + va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK); + +next_pteg: + if (rr == 16) { + primary = !primary; + evict = true; + rr = 0; + } + + pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary); + + /* not evicting yet */ + if (!evict && (pteg[rr] & PTE_V)) { + rr += 2; + goto next_pteg; + } + + dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr); + dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); + + pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V | + (primary ? 0 : PTE_SEC); + pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; + + if (orig_pte->may_write) { + pteg1 |= PP_RWRW; + mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); + } else { + pteg1 |= PP_RWRX; + } + + local_irq_disable(); + + if (pteg[rr]) { + pteg[rr] = 0; + asm volatile ("sync"); + } + pteg[rr + 1] = pteg1; + pteg[rr] = pteg0; + asm volatile ("sync"); + + local_irq_enable(); + + dprintk_mmu("KVM: new PTEG: %p\n", pteg); + dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); + dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); + + + /* Now tell our Shadow PTE code about the new page */ + + pte = kvmppc_mmu_hpte_cache_next(vcpu); + + dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", + orig_pte->may_write ? 'w' : '-', + orig_pte->may_execute ? 'x' : '-', + orig_pte->eaddr, (ulong)pteg, va, + orig_pte->vpage, hpaddr); + + pte->slot = (ulong)&pteg[rr]; + pte->host_va = va; + pte->pte = *orig_pte; + pte->pfn = hpaddr >> PAGE_SHIFT; + + kvmppc_mmu_hpte_cache_map(vcpu, pte); + +out: + return r; +} + +static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) +{ + struct kvmppc_sid_map *map; + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + u16 sid_map_mask; + static int backwards_map = 0; + + if (vcpu->arch.shared->msr & MSR_PR) + gvsid |= VSID_PR; + + /* We might get collisions that trap in preceding order, so let's + map them differently */ + + sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); + if (backwards_map) + sid_map_mask = SID_MAP_MASK - sid_map_mask; + + map = &to_book3s(vcpu)->sid_map[sid_map_mask]; + + /* Make sure we're taking the other map next time */ + backwards_map = !backwards_map; + + /* Uh-oh ... out of mappings. Let's flush! */ + if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) { + vcpu_book3s->vsid_next = 0; + memset(vcpu_book3s->sid_map, 0, + sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); + kvmppc_mmu_pte_flush(vcpu, 0, 0); + kvmppc_mmu_flush_segments(vcpu); + } + map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next]; + vcpu_book3s->vsid_next++; + + map->guest_vsid = gvsid; + map->valid = true; + + return map; +} + +int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) +{ + u32 esid = eaddr >> SID_SHIFT; + u64 gvsid; + u32 sr; + struct kvmppc_sid_map *map; + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + int r = 0; + + if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { + /* Invalidate an entry */ + svcpu->sr[esid] = SR_INVALID; + r = -ENOENT; + goto out; + } + + map = find_sid_vsid(vcpu, gvsid); + if (!map) + map = create_sid_map(vcpu, gvsid); + + map->guest_esid = esid; + sr = map->host_vsid | SR_KP; + svcpu->sr[esid] = sr; + + dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); + +out: + svcpu_put(svcpu); + return r; +} + +void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) +{ + int i; + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + + dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); + for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) + svcpu->sr[i] = SR_INVALID; + + svcpu_put(svcpu); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ + int i; + + kvmppc_mmu_hpte_destroy(vcpu); + preempt_disable(); + for (i = 0; i < SID_CONTEXTS; i++) + __destroy_context(to_book3s(vcpu)->context_id[i]); + preempt_enable(); +} + +/* From mm/mmu_context_hash32.c */ +#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) + +int kvmppc_mmu_init(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int err; + ulong sdr1; + int i; + int j; + + for (i = 0; i < SID_CONTEXTS; i++) { + err = __init_new_context(); + if (err < 0) + goto init_fail; + vcpu3s->context_id[i] = err; + + /* Remember context id for this combination */ + for (j = 0; j < 16; j++) + vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j); + } + + vcpu3s->vsid_next = 0; + + /* Remember where the HTAB is */ + asm ( "mfsdr1 %0" : "=r"(sdr1) ); + htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; + htab = (ulong)__va(sdr1 & 0xffff0000); + + kvmppc_mmu_hpte_init(vcpu); + + return 0; + +init_fail: + for (j = 0; j < i; j++) { + if (!vcpu3s->context_id[j]) + continue; + + __destroy_context(to_book3s(vcpu)->context_id[j]); + } + + return -1; +} diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S new file mode 100644 index 00000000..7e06a6fc --- /dev/null +++ b/arch/powerpc/kvm/book3s_32_sr.S @@ -0,0 +1,143 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +/****************************************************************************** + * * + * Entry code * + * * + *****************************************************************************/ + +.macro LOAD_GUEST_SEGMENTS + + /* Required state: + * + * MSR = ~IR|DR + * R1 = host R1 + * R2 = host R2 + * R3 = shadow vcpu + * all other volatile GPRS = free except R4, R6 + * SVCPU[CR] = guest CR + * SVCPU[XER] = guest XER + * SVCPU[CTR] = guest CTR + * SVCPU[LR] = guest LR + */ + +#define XCHG_SR(n) lwz r9, (SVCPU_SR+(n*4))(r3); \ + mtsr n, r9 + + XCHG_SR(0) + XCHG_SR(1) + XCHG_SR(2) + XCHG_SR(3) + XCHG_SR(4) + XCHG_SR(5) + XCHG_SR(6) + XCHG_SR(7) + XCHG_SR(8) + XCHG_SR(9) + XCHG_SR(10) + XCHG_SR(11) + XCHG_SR(12) + XCHG_SR(13) + XCHG_SR(14) + XCHG_SR(15) + + /* Clear BATs. */ + +#define KVM_KILL_BAT(n, reg) \ + mtspr SPRN_IBAT##n##U,reg; \ + mtspr SPRN_IBAT##n##L,reg; \ + mtspr SPRN_DBAT##n##U,reg; \ + mtspr SPRN_DBAT##n##L,reg; \ + + li r9, 0 + KVM_KILL_BAT(0, r9) + KVM_KILL_BAT(1, r9) + KVM_KILL_BAT(2, r9) + KVM_KILL_BAT(3, r9) + +.endm + +/****************************************************************************** + * * + * Exit code * + * * + *****************************************************************************/ + +.macro LOAD_HOST_SEGMENTS + + /* Register usage at this point: + * + * R1 = host R1 + * R2 = host R2 + * R12 = exit handler id + * R13 = shadow vcpu - SHADOW_VCPU_OFF + * SVCPU.* = guest * + * SVCPU[CR] = guest CR + * SVCPU[XER] = guest XER + * SVCPU[CTR] = guest CTR + * SVCPU[LR] = guest LR + * + */ + + /* Restore BATs */ + + /* We only overwrite the upper part, so we only restoree + the upper part. */ +#define KVM_LOAD_BAT(n, reg, RA, RB) \ + lwz RA,(n*16)+0(reg); \ + lwz RB,(n*16)+4(reg); \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_IBAT##n##L,RB; \ + lwz RA,(n*16)+8(reg); \ + lwz RB,(n*16)+12(reg); \ + mtspr SPRN_DBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##L,RB; \ + + lis r9, BATS@ha + addi r9, r9, BATS@l + tophys(r9, r9) + KVM_LOAD_BAT(0, r9, r10, r11) + KVM_LOAD_BAT(1, r9, r10, r11) + KVM_LOAD_BAT(2, r9, r10, r11) + KVM_LOAD_BAT(3, r9, r10, r11) + + /* Restore Segment Registers */ + + /* 0xc - 0xf */ + + li r0, 4 + mtctr r0 + LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc)) + lis r4, 0xc000 +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + + /* 0x0 - 0xb */ + + /* 'current->mm' needs to be in r4 */ + tophys(r4, r2) + lwz r4, MM(r4) + tophys(r4, r4) + /* This only clobbers r0, r3, r4 and r5 */ + bl switch_mmu_context + +.endm diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c new file mode 100644 index 00000000..b871721c --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -0,0 +1,536 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> + +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> + +/* #define DEBUG_MMU */ + +#ifdef DEBUG_MMU +#define dprintk(X...) printk(KERN_INFO X) +#else +#define dprintk(X...) do { } while(0) +#endif + +static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) +{ + kvmppc_set_msr(vcpu, MSR_SF); +} + +static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( + struct kvm_vcpu *vcpu, + gva_t eaddr) +{ + int i; + u64 esid = GET_ESID(eaddr); + u64 esid_1t = GET_ESID_1T(eaddr); + + for (i = 0; i < vcpu->arch.slb_nr; i++) { + u64 cmp_esid = esid; + + if (!vcpu->arch.slb[i].valid) + continue; + + if (vcpu->arch.slb[i].tb) + cmp_esid = esid_1t; + + if (vcpu->arch.slb[i].esid == cmp_esid) + return &vcpu->arch.slb[i]; + } + + dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n", + eaddr, esid, esid_1t); + for (i = 0; i < vcpu->arch.slb_nr; i++) { + if (vcpu->arch.slb[i].vsid) + dprintk(" %d: %c%c%c %llx %llx\n", i, + vcpu->arch.slb[i].valid ? 'v' : ' ', + vcpu->arch.slb[i].large ? 'l' : ' ', + vcpu->arch.slb[i].tb ? 't' : ' ', + vcpu->arch.slb[i].esid, + vcpu->arch.slb[i].vsid); + } + + return NULL; +} + +static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, + bool data) +{ + struct kvmppc_slb *slb; + + slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); + if (!slb) + return 0; + + if (slb->tb) + return (((u64)eaddr >> 12) & 0xfffffff) | + (((u64)slb->vsid) << 28); + + return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16); +} + +static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) +{ + return slbe->large ? 24 : 12; +} + +static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) +{ + int p = kvmppc_mmu_book3s_64_get_pagesize(slbe); + return ((eaddr & 0xfffffff) >> p); +} + +static hva_t kvmppc_mmu_book3s_64_get_pteg( + struct kvmppc_vcpu_book3s *vcpu_book3s, + struct kvmppc_slb *slbe, gva_t eaddr, + bool second) +{ + u64 hash, pteg, htabsize; + u32 page; + hva_t r; + + page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); + htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1); + + hash = slbe->vsid ^ page; + if (second) + hash = ~hash; + hash &= ((1ULL << 39ULL) - 1ULL); + hash &= htabsize; + hash <<= 7ULL; + + pteg = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL; + pteg |= hash; + + dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n", + page, vcpu_book3s->sdr1, pteg, slbe->vsid); + + /* When running a PAPR guest, SDR1 contains a HVA address instead + of a GPA */ + if (vcpu_book3s->vcpu.arch.papr_enabled) + r = pteg; + else + r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); + + if (kvm_is_error_hva(r)) + return r; + return r | (pteg & ~PAGE_MASK); +} + +static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr) +{ + int p = kvmppc_mmu_book3s_64_get_pagesize(slbe); + u64 avpn; + + avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); + avpn |= slbe->vsid << (28 - p); + + if (p < 24) + avpn >>= ((80 - p) - 56) - 8; + else + avpn <<= 8; + + return avpn; +} + +static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *gpte, bool data) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + struct kvmppc_slb *slbe; + hva_t ptegp; + u64 pteg[16]; + u64 avpn = 0; + int i; + u8 key = 0; + bool found = false; + bool perm_err = false; + int second = 0; + ulong mp_ea = vcpu->arch.magic_page_ea; + + /* Magic page override */ + if (unlikely(mp_ea) && + unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + gpte->eaddr = eaddr; + gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); + gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff); + gpte->raddr &= KVM_PAM; + gpte->may_execute = true; + gpte->may_read = true; + gpte->may_write = true; + + return 0; + } + + slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); + if (!slbe) + goto no_seg_found; + +do_second: + ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); + if (kvm_is_error_hva(ptegp)) + goto no_page_found; + + avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); + + if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { + printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp); + goto no_page_found; + } + + if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp) + key = 4; + else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks) + key = 4; + + for (i=0; i<16; i+=2) { + u64 v = pteg[i]; + u64 r = pteg[i+1]; + + /* Valid check */ + if (!(v & HPTE_V_VALID)) + continue; + /* Hash check */ + if ((v & HPTE_V_SECONDARY) != second) + continue; + + /* AVPN compare */ + if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) { + u8 pp = (r & HPTE_R_PP) | key; + int eaddr_mask = 0xFFF; + + gpte->eaddr = eaddr; + gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, + eaddr, + data); + if (slbe->large) + eaddr_mask = 0xFFFFFF; + gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask); + gpte->may_execute = ((r & HPTE_R_N) ? false : true); + gpte->may_read = false; + gpte->may_write = false; + + switch (pp) { + case 0: + case 1: + case 2: + case 6: + gpte->may_write = true; + /* fall through */ + case 3: + case 5: + case 7: + gpte->may_read = true; + break; + } + + if (!gpte->may_read) { + perm_err = true; + continue; + } + + dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " + "-> 0x%lx\n", + eaddr, avpn, gpte->vpage, gpte->raddr); + found = true; + break; + } + } + + /* Update PTE R and C bits, so the guest's swapper knows we used the + * page */ + if (found) { + u32 oldr = pteg[i+1]; + + if (gpte->may_read) { + /* Set the accessed flag */ + pteg[i+1] |= HPTE_R_R; + } + if (gpte->may_write) { + /* Set the dirty flag */ + pteg[i+1] |= HPTE_R_C; + } else { + dprintk("KVM: Mapping read-only page!\n"); + } + + /* Write back into the PTEG */ + if (pteg[i+1] != oldr) + copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); + + return 0; + } else { + dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " + "ptegp=0x%lx)\n", + eaddr, to_book3s(vcpu)->sdr1, ptegp); + for (i = 0; i < 16; i += 2) + dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n", + i, pteg[i], pteg[i+1], avpn); + + if (!second) { + second = HPTE_V_SECONDARY; + goto do_second; + } + } + + +no_page_found: + + + if (perm_err) + return -EPERM; + + return -ENOENT; + +no_seg_found: + + dprintk("KVM MMU: Trigger segment fault\n"); + return -EINVAL; +} + +static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s; + u64 esid, esid_1t; + int slb_nr; + struct kvmppc_slb *slbe; + + dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb); + + vcpu_book3s = to_book3s(vcpu); + + esid = GET_ESID(rb); + esid_1t = GET_ESID_1T(rb); + slb_nr = rb & 0xfff; + + if (slb_nr > vcpu->arch.slb_nr) + return; + + slbe = &vcpu->arch.slb[slb_nr]; + + slbe->large = (rs & SLB_VSID_L) ? 1 : 0; + slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; + slbe->esid = slbe->tb ? esid_1t : esid; + slbe->vsid = rs >> 12; + slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; + slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; + slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; + slbe->nx = (rs & SLB_VSID_N) ? 1 : 0; + slbe->class = (rs & SLB_VSID_C) ? 1 : 0; + + slbe->orige = rb & (ESID_MASK | SLB_ESID_V); + slbe->origv = rs; + + /* Map the new segment */ + kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT); +} + +static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) +{ + struct kvmppc_slb *slbe; + + if (slb_nr > vcpu->arch.slb_nr) + return 0; + + slbe = &vcpu->arch.slb[slb_nr]; + + return slbe->orige; +} + +static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr) +{ + struct kvmppc_slb *slbe; + + if (slb_nr > vcpu->arch.slb_nr) + return 0; + + slbe = &vcpu->arch.slb[slb_nr]; + + return slbe->origv; +} + +static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) +{ + struct kvmppc_slb *slbe; + + dprintk("KVM MMU: slbie(0x%llx)\n", ea); + + slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); + + if (!slbe) + return; + + dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid); + + slbe->valid = false; + + kvmppc_mmu_map_segment(vcpu, ea); +} + +static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) +{ + int i; + + dprintk("KVM MMU: slbia()\n"); + + for (i = 1; i < vcpu->arch.slb_nr; i++) + vcpu->arch.slb[i].valid = false; + + if (vcpu->arch.shared->msr & MSR_IR) { + kvmppc_mmu_flush_segments(vcpu); + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + } +} + +static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, + ulong value) +{ + u64 rb = 0, rs = 0; + + /* + * According to Book3 2.01 mtsrin is implemented as: + * + * The SLB entry specified by (RB)32:35 is loaded from register + * RS, as follows. + * + * SLBE Bit Source SLB Field + * + * 0:31 0x0000_0000 ESID-0:31 + * 32:35 (RB)32:35 ESID-32:35 + * 36 0b1 V + * 37:61 0x00_0000|| 0b0 VSID-0:24 + * 62:88 (RS)37:63 VSID-25:51 + * 89:91 (RS)33:35 Ks Kp N + * 92 (RS)36 L ((RS)36 must be 0b0) + * 93 0b0 C + */ + + dprintk("KVM MMU: mtsrin(0x%x, 0x%lx)\n", srnum, value); + + /* ESID = srnum */ + rb |= (srnum & 0xf) << 28; + /* Set the valid bit */ + rb |= 1 << 27; + /* Index = ESID */ + rb |= srnum; + + /* VSID = VSID */ + rs |= (value & 0xfffffff) << 12; + /* flags = flags */ + rs |= ((value >> 28) & 0x7) << 9; + + kvmppc_mmu_book3s_64_slbmte(vcpu, rs, rb); +} + +static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va, + bool large) +{ + u64 mask = 0xFFFFFFFFFULL; + + dprintk("KVM MMU: tlbie(0x%lx)\n", va); + + if (large) + mask = 0xFFFFFF000ULL; + kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); +} + +static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, + u64 *vsid) +{ + ulong ea = esid << SID_SHIFT; + struct kvmppc_slb *slb; + u64 gvsid = esid; + ulong mp_ea = vcpu->arch.magic_page_ea; + + if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); + if (slb) + gvsid = slb->vsid; + } + + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + case 0: + *vsid = VSID_REAL | esid; + break; + case MSR_IR: + *vsid = VSID_REAL_IR | gvsid; + break; + case MSR_DR: + *vsid = VSID_REAL_DR | gvsid; + break; + case MSR_DR|MSR_IR: + if (!slb) + goto no_slb; + + *vsid = gvsid; + break; + default: + BUG(); + break; + } + + if (vcpu->arch.shared->msr & MSR_PR) + *vsid |= VSID_PR; + + return 0; + +no_slb: + /* Catch magic page case */ + if (unlikely(mp_ea) && + unlikely(esid == (mp_ea >> SID_SHIFT)) && + !(vcpu->arch.shared->msr & MSR_PR)) { + *vsid = VSID_REAL | esid; + return 0; + } + + return -EINVAL; +} + +static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu) +{ + return (to_book3s(vcpu)->hid[5] & 0x80); +} + +void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) +{ + struct kvmppc_mmu *mmu = &vcpu->arch.mmu; + + mmu->mfsrin = NULL; + mmu->mtsrin = kvmppc_mmu_book3s_64_mtsrin; + mmu->slbmte = kvmppc_mmu_book3s_64_slbmte; + mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee; + mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev; + mmu->slbie = kvmppc_mmu_book3s_64_slbie; + mmu->slbia = kvmppc_mmu_book3s_64_slbia; + mmu->xlate = kvmppc_mmu_book3s_64_xlate; + mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr; + mmu->tlbie = kvmppc_mmu_book3s_64_tlbie; + mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid; + mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp; + mmu->is_dcbz32 = kvmppc_mmu_book3s_64_is_dcbz32; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; +} diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c new file mode 100644 index 00000000..10fc8ec9 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -0,0 +1,330 @@ +/* + * Copyright (C) 2009 SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * Kevin Wolf <mail@kevin-wolf.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> +#include <asm/machdep.h> +#include <asm/mmu_context.h> +#include <asm/hw_irq.h> +#include "trace.h" + +#define PTE_SIZE 12 + +void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) +{ + ppc_md.hpte_invalidate(pte->slot, pte->host_va, + MMU_PAGE_4K, MMU_SEGSIZE_256M, + false); +} + +/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using + * a hash, so we don't waste cycles on looping */ +static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) +{ + return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ + ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); +} + + +static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) +{ + struct kvmppc_sid_map *map; + u16 sid_map_mask; + + if (vcpu->arch.shared->msr & MSR_PR) + gvsid |= VSID_PR; + + sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); + map = &to_book3s(vcpu)->sid_map[sid_map_mask]; + if (map->valid && (map->guest_vsid == gvsid)) { + trace_kvm_book3s_slb_found(gvsid, map->host_vsid); + return map; + } + + map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; + if (map->valid && (map->guest_vsid == gvsid)) { + trace_kvm_book3s_slb_found(gvsid, map->host_vsid); + return map; + } + + trace_kvm_book3s_slb_fail(sid_map_mask, gvsid); + return NULL; +} + +int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) +{ + pfn_t hpaddr; + ulong hash, hpteg, va; + u64 vsid; + int ret; + int rflags = 0x192; + int vflags = 0; + int attempt = 0; + struct kvmppc_sid_map *map; + int r = 0; + + /* Get host physical address for gpa */ + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); + if (is_error_pfn(hpaddr)) { + printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); + r = -EINVAL; + goto out; + } + hpaddr <<= PAGE_SHIFT; + hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); + + /* and write the mapping ea -> hpa into the pt */ + vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); + map = find_sid_vsid(vcpu, vsid); + if (!map) { + ret = kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); + WARN_ON(ret < 0); + map = find_sid_vsid(vcpu, vsid); + } + if (!map) { + printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", + vsid, orig_pte->eaddr); + WARN_ON(true); + r = -EINVAL; + goto out; + } + + vsid = map->host_vsid; + va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); + + if (!orig_pte->may_write) + rflags |= HPTE_R_PP; + else + mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); + + if (!orig_pte->may_execute) + rflags |= HPTE_R_N; + + hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M); + +map_again: + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + + /* In case we tried normal mapping already, let's nuke old entries */ + if (attempt > 1) + if (ppc_md.hpte_remove(hpteg) < 0) { + r = -1; + goto out; + } + + ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M); + + if (ret < 0) { + /* If we couldn't map a primary PTE, try a secondary */ + hash = ~hash; + vflags ^= HPTE_V_SECONDARY; + attempt++; + goto map_again; + } else { + struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); + + trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte); + + /* The ppc_md code may give us a secondary entry even though we + asked for a primary. Fix up. */ + if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { + hash = ~hash; + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + } + + pte->slot = hpteg + (ret & 7); + pte->host_va = va; + pte->pte = *orig_pte; + pte->pfn = hpaddr >> PAGE_SHIFT; + + kvmppc_mmu_hpte_cache_map(vcpu, pte); + } + +out: + return r; +} + +static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) +{ + struct kvmppc_sid_map *map; + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + u16 sid_map_mask; + static int backwards_map = 0; + + if (vcpu->arch.shared->msr & MSR_PR) + gvsid |= VSID_PR; + + /* We might get collisions that trap in preceding order, so let's + map them differently */ + + sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); + if (backwards_map) + sid_map_mask = SID_MAP_MASK - sid_map_mask; + + map = &to_book3s(vcpu)->sid_map[sid_map_mask]; + + /* Make sure we're taking the other map next time */ + backwards_map = !backwards_map; + + /* Uh-oh ... out of mappings. Let's flush! */ + if (vcpu_book3s->proto_vsid_next == vcpu_book3s->proto_vsid_max) { + vcpu_book3s->proto_vsid_next = vcpu_book3s->proto_vsid_first; + memset(vcpu_book3s->sid_map, 0, + sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); + kvmppc_mmu_pte_flush(vcpu, 0, 0); + kvmppc_mmu_flush_segments(vcpu); + } + map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M); + + map->guest_vsid = gvsid; + map->valid = true; + + trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid); + + return map; +} + +static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) +{ + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + int i; + int max_slb_size = 64; + int found_inval = -1; + int r; + + if (!svcpu->slb_max) + svcpu->slb_max = 1; + + /* Are we overwriting? */ + for (i = 1; i < svcpu->slb_max; i++) { + if (!(svcpu->slb[i].esid & SLB_ESID_V)) + found_inval = i; + else if ((svcpu->slb[i].esid & ESID_MASK) == esid) { + r = i; + goto out; + } + } + + /* Found a spare entry that was invalidated before */ + if (found_inval > 0) { + r = found_inval; + goto out; + } + + /* No spare invalid entry, so create one */ + + if (mmu_slb_size < 64) + max_slb_size = mmu_slb_size; + + /* Overflowing -> purge */ + if ((svcpu->slb_max) == max_slb_size) + kvmppc_mmu_flush_segments(vcpu); + + r = svcpu->slb_max; + svcpu->slb_max++; + +out: + svcpu_put(svcpu); + return r; +} + +int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) +{ + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + u64 esid = eaddr >> SID_SHIFT; + u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V; + u64 slb_vsid = SLB_VSID_USER; + u64 gvsid; + int slb_index; + struct kvmppc_sid_map *map; + int r = 0; + + slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK); + + if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { + /* Invalidate an entry */ + svcpu->slb[slb_index].esid = 0; + r = -ENOENT; + goto out; + } + + map = find_sid_vsid(vcpu, gvsid); + if (!map) + map = create_sid_map(vcpu, gvsid); + + map->guest_esid = esid; + + slb_vsid |= (map->host_vsid << 12); + slb_vsid &= ~SLB_VSID_KP; + slb_esid |= slb_index; + + svcpu->slb[slb_index].esid = slb_esid; + svcpu->slb[slb_index].vsid = slb_vsid; + + trace_kvm_book3s_slbmte(slb_vsid, slb_esid); + +out: + svcpu_put(svcpu); + return r; +} + +void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) +{ + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + svcpu->slb_max = 1; + svcpu->slb[0].esid = 0; + svcpu_put(svcpu); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ + kvmppc_mmu_hpte_destroy(vcpu); + __destroy_context(to_book3s(vcpu)->context_id[0]); +} + +int kvmppc_mmu_init(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int err; + + err = __init_new_context(); + if (err < 0) + return -1; + vcpu3s->context_id[0] = err; + + vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) + << USER_ESID_BITS) - 1; + vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; + vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; + + kvmppc_mmu_hpte_init(vcpu); + + return 0; +} diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c new file mode 100644 index 00000000..c3beaeef --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -0,0 +1,1027 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/hugetlb.h> +#include <linux/vmalloc.h> + +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> +#include <asm/hvcall.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> +#include <asm/cputable.h> + +/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ +#define MAX_LPID_970 63 +#define NR_LPIDS (LPID_RSVD + 1) +unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)]; + +long kvmppc_alloc_hpt(struct kvm *kvm) +{ + unsigned long hpt; + unsigned long lpid; + struct revmap_entry *rev; + struct kvmppc_linear_info *li; + + /* Allocate guest's hashed page table */ + li = kvm_alloc_hpt(); + if (li) { + /* using preallocated memory */ + hpt = (ulong)li->base_virt; + kvm->arch.hpt_li = li; + } else { + /* using dynamic memory */ + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| + __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT); + } + + if (!hpt) { + pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); + return -ENOMEM; + } + kvm->arch.hpt_virt = hpt; + + /* Allocate reverse map array */ + rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE); + if (!rev) { + pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); + goto out_freehpt; + } + kvm->arch.revmap = rev; + + /* Allocate the guest's logical partition ID */ + do { + lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); + if (lpid >= NR_LPIDS) { + pr_err("kvm_alloc_hpt: No LPIDs free\n"); + goto out_freeboth; + } + } while (test_and_set_bit(lpid, lpid_inuse)); + + kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); + kvm->arch.lpid = lpid; + + pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); + return 0; + + out_freeboth: + vfree(rev); + out_freehpt: + free_pages(hpt, HPT_ORDER - PAGE_SHIFT); + return -ENOMEM; +} + +void kvmppc_free_hpt(struct kvm *kvm) +{ + clear_bit(kvm->arch.lpid, lpid_inuse); + vfree(kvm->arch.revmap); + if (kvm->arch.hpt_li) + kvm_release_hpt(kvm->arch.hpt_li); + else + free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); +} + +/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; +} + +/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize == 0x10000) ? 0x1000 : 0; +} + +void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, + unsigned long porder) +{ + unsigned long i; + unsigned long npages; + unsigned long hp_v, hp_r; + unsigned long addr, hash; + unsigned long psize; + unsigned long hp0, hp1; + long ret; + + psize = 1ul << porder; + npages = memslot->npages >> (porder - PAGE_SHIFT); + + /* VRMA can't be > 1TB */ + if (npages > 1ul << (40 - porder)) + npages = 1ul << (40 - porder); + /* Can't use more than 1 HPTE per HPTEG */ + if (npages > HPT_NPTEG) + npages = HPT_NPTEG; + + hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | + HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); + hp1 = hpte1_pgsize_encoding(psize) | + HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + + for (i = 0; i < npages; ++i) { + addr = i << porder; + /* can't use hpt_hash since va > 64 bits */ + hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; + /* + * We assume that the hash table is empty and no + * vcpus are using it at this stage. Since we create + * at most one HPTE per HPTEG, we just assume entry 7 + * is available and use it. + */ + hash = (hash << 3) + 7; + hp_v = hp0 | ((addr >> 16) & ~0x7fUL); + hp_r = hp1 | addr; + ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); + if (ret != H_SUCCESS) { + pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", + addr, ret); + break; + } + } +} + +int kvmppc_mmu_hv_init(void) +{ + unsigned long host_lpid, rsvd_lpid; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EINVAL; + + memset(lpid_inuse, 0, sizeof(lpid_inuse)); + + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + host_lpid = mfspr(SPRN_LPID); /* POWER7 */ + rsvd_lpid = LPID_RSVD; + } else { + host_lpid = 0; /* PPC970 */ + rsvd_lpid = MAX_LPID_970; + } + + set_bit(host_lpid, lpid_inuse); + /* rsvd_lpid is reserved for use in partition switching */ + set_bit(rsvd_lpid, lpid_inuse); + + return 0; +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ +} + +static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) +{ + kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); +} + +/* + * This is called to get a reference to a guest page if there isn't + * one already in the kvm->arch.slot_phys[][] arrays. + */ +static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, + struct kvm_memory_slot *memslot, + unsigned long psize) +{ + unsigned long start; + long np, err; + struct page *page, *hpage, *pages[1]; + unsigned long s, pgsize; + unsigned long *physp; + unsigned int is_io, got, pgorder; + struct vm_area_struct *vma; + unsigned long pfn, i, npages; + + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) + return -EINVAL; + if (physp[gfn - memslot->base_gfn]) + return 0; + + is_io = 0; + got = 0; + page = NULL; + pgsize = psize; + err = -EINVAL; + start = gfn_to_hva_memslot(memslot, gfn); + + /* Instantiate and get the page we want access to */ + np = get_user_pages_fast(start, 1, 1, pages); + if (np != 1) { + /* Look up the vma for the page */ + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, start); + if (!vma || vma->vm_start > start || + start + psize > vma->vm_end || + !(vma->vm_flags & VM_PFNMAP)) + goto up_err; + is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); + pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); + /* check alignment of pfn vs. requested page size */ + if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) + goto up_err; + up_read(¤t->mm->mmap_sem); + + } else { + page = pages[0]; + got = KVMPPC_GOT_PAGE; + + /* See if this is a large page */ + s = PAGE_SIZE; + if (PageHuge(page)) { + hpage = compound_head(page); + s <<= compound_order(hpage); + /* Get the whole large page if slot alignment is ok */ + if (s > psize && slot_is_aligned(memslot, s) && + !(memslot->userspace_addr & (s - 1))) { + start &= ~(s - 1); + pgsize = s; + get_page(hpage); + put_page(page); + page = hpage; + } + } + if (s < psize) + goto out; + pfn = page_to_pfn(page); + } + + npages = pgsize >> PAGE_SHIFT; + pgorder = __ilog2(npages); + physp += (gfn - memslot->base_gfn) & ~(npages - 1); + spin_lock(&kvm->arch.slot_phys_lock); + for (i = 0; i < npages; ++i) { + if (!physp[i]) { + physp[i] = ((pfn + i) << PAGE_SHIFT) + + got + is_io + pgorder; + got = 0; + } + } + spin_unlock(&kvm->arch.slot_phys_lock); + err = 0; + + out: + if (got) + put_page(page); + return err; + + up_err: + up_read(¤t->mm->mmap_sem); + return err; +} + +/* + * We come here on a H_ENTER call from the guest when we are not + * using mmu notifiers and we don't have the requested page pinned + * already. + */ +long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long psize, gpa, gfn; + struct kvm_memory_slot *memslot; + long ret; + + if (kvm->arch.using_mmu_notifiers) + goto do_insert; + + psize = hpte_page_size(pteh, ptel); + if (!psize) + return H_PARAMETER; + + pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); + + /* Find the memslot (if any) for this address */ + gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); + gfn = gpa >> PAGE_SHIFT; + memslot = gfn_to_memslot(kvm, gfn); + if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; + if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) + return H_PARAMETER; + } + + do_insert: + /* Protect linux PTE lookup from page table destruction */ + rcu_read_lock_sched(); /* this disables preemption too */ + vcpu->arch.pgdir = current->mm->pgd; + ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); + rcu_read_unlock_sched(); + if (ret == H_TOO_HARD) { + /* this can't happen */ + pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); + ret = H_RESOURCE; /* or something */ + } + return ret; + +} + +static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, + gva_t eaddr) +{ + u64 mask; + int i; + + for (i = 0; i < vcpu->arch.slb_nr; i++) { + if (!(vcpu->arch.slb[i].orige & SLB_ESID_V)) + continue; + + if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T) + mask = ESID_MASK_1T; + else + mask = ESID_MASK; + + if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0) + return &vcpu->arch.slb[i]; + } + return NULL; +} + +static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, + unsigned long ea) +{ + unsigned long ra_mask; + + ra_mask = hpte_page_size(v, r) - 1; + return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); +} + +static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *gpte, bool data) +{ + struct kvm *kvm = vcpu->kvm; + struct kvmppc_slb *slbe; + unsigned long slb_v; + unsigned long pp, key; + unsigned long v, gr; + unsigned long *hptep; + int index; + int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); + + /* Get SLB entry */ + if (virtmode) { + slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr); + if (!slbe) + return -EINVAL; + slb_v = slbe->origv; + } else { + /* real mode access */ + slb_v = vcpu->kvm->arch.vrma_slb_v; + } + + /* Find the HPTE in the hash table */ + index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, + HPTE_V_VALID | HPTE_V_ABSENT); + if (index < 0) + return -ENOENT; + hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); + v = hptep[0] & ~HPTE_V_HVLOCK; + gr = kvm->arch.revmap[index].guest_rpte; + + /* Unlock the HPTE */ + asm volatile("lwsync" : : : "memory"); + hptep[0] = v; + + gpte->eaddr = eaddr; + gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); + + /* Get PP bits and key for permission check */ + pp = gr & (HPTE_R_PP0 | HPTE_R_PP); + key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; + key &= slb_v; + + /* Calculate permissions */ + gpte->may_read = hpte_read_permission(pp, key); + gpte->may_write = hpte_write_permission(pp, key); + gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); + + /* Storage key permission check for POWER7 */ + if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { + int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); + if (amrfield & 1) + gpte->may_read = 0; + if (amrfield & 2) + gpte->may_write = 0; + } + + /* Get the guest physical address */ + gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr); + return 0; +} + +/* + * Quick test for whether an instruction is a load or a store. + * If the instruction is a load or a store, then this will indicate + * which it is, at least on server processors. (Embedded processors + * have some external PID instructions that don't follow the rule + * embodied here.) If the instruction isn't a load or store, then + * this doesn't return anything useful. + */ +static int instruction_is_store(unsigned int instr) +{ + unsigned int mask; + + mask = 0x10000000; + if ((instr & 0xfc000000) == 0x7c000000) + mask = 0x100; /* major opcode 31 */ + return (instr & mask) != 0; +} + +static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned long gpa, int is_store) +{ + int ret; + u32 last_inst; + unsigned long srr0 = kvmppc_get_pc(vcpu); + + /* We try to load the last instruction. We don't let + * emulate_instruction do it as it doesn't check what + * kvmppc_ld returns. + * If we fail, we just return to the guest and try executing it again. + */ + if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { + ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); + if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) + return RESUME_GUEST; + vcpu->arch.last_inst = last_inst; + } + + /* + * WARNING: We do not know for sure whether the instruction we just + * read from memory is the same that caused the fault in the first + * place. If the instruction we read is neither an load or a store, + * then it can't access memory, so we don't need to worry about + * enforcing access permissions. So, assuming it is a load or + * store, we just check that its direction (load or store) is + * consistent with the original fault, since that's what we + * checked the access permissions against. If there is a mismatch + * we just return and retry the instruction. + */ + + if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) + return RESUME_GUEST; + + /* + * Emulated accesses are emulated by looking at the hash for + * translation once, then performing the access later. The + * translation could be invalidated in the meantime in which + * point performing the subsequent memory access on the old + * physical address could possibly be a security hole for the + * guest (but not the host). + * + * This is less of an issue for MMIO stores since they aren't + * globally visible. It could be an issue for MMIO loads to + * a certain extent but we'll ignore it for now. + */ + + vcpu->arch.paddr_accessed = gpa; + return kvmppc_emulate_mmio(run, vcpu); +} + +int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned long ea, unsigned long dsisr) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *hptep, hpte[3], r; + unsigned long mmu_seq, psize, pte_size; + unsigned long gfn, hva, pfn; + struct kvm_memory_slot *memslot; + unsigned long *rmap; + struct revmap_entry *rev; + struct page *page, *pages[1]; + long index, ret, npages; + unsigned long is_io; + unsigned int writing, write_ok; + struct vm_area_struct *vma; + unsigned long rcbits; + + /* + * Real-mode code has already searched the HPT and found the + * entry we're interested in. Lock the entry and check that + * it hasn't changed. If it has, just return and re-execute the + * instruction. + */ + if (ea != vcpu->arch.pgfault_addr) + return RESUME_GUEST; + index = vcpu->arch.pgfault_index; + hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); + rev = &kvm->arch.revmap[index]; + preempt_disable(); + while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) + cpu_relax(); + hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; + hpte[1] = hptep[1]; + hpte[2] = r = rev->guest_rpte; + asm volatile("lwsync" : : : "memory"); + hptep[0] = hpte[0]; + preempt_enable(); + + if (hpte[0] != vcpu->arch.pgfault_hpte[0] || + hpte[1] != vcpu->arch.pgfault_hpte[1]) + return RESUME_GUEST; + + /* Translate the logical address and get the page */ + psize = hpte_page_size(hpte[0], r); + gfn = hpte_rpn(r, psize); + memslot = gfn_to_memslot(kvm, gfn); + + /* No memslot means it's an emulated MMIO region */ + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { + unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, + dsisr & DSISR_ISSTORE); + } + + if (!kvm->arch.using_mmu_notifiers) + return -EFAULT; /* should never get here */ + + /* used to check for invalidations in progress */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + + is_io = 0; + pfn = 0; + page = NULL; + pte_size = PAGE_SIZE; + writing = (dsisr & DSISR_ISSTORE) != 0; + /* If writing != 0, then the HPTE must allow writing, if we get here */ + write_ok = writing; + hva = gfn_to_hva_memslot(memslot, gfn); + npages = get_user_pages_fast(hva, 1, writing, pages); + if (npages < 1) { + /* Check if it's an I/O mapping */ + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, hva); + if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && + (vma->vm_flags & VM_PFNMAP)) { + pfn = vma->vm_pgoff + + ((hva - vma->vm_start) >> PAGE_SHIFT); + pte_size = psize; + is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); + write_ok = vma->vm_flags & VM_WRITE; + } + up_read(¤t->mm->mmap_sem); + if (!pfn) + return -EFAULT; + } else { + page = pages[0]; + if (PageHuge(page)) { + page = compound_head(page); + pte_size <<= compound_order(page); + } + /* if the guest wants write access, see if that is OK */ + if (!writing && hpte_is_writable(r)) { + pte_t *ptep, pte; + + /* + * We need to protect against page table destruction + * while looking up and updating the pte. + */ + rcu_read_lock_sched(); + ptep = find_linux_pte_or_hugepte(current->mm->pgd, + hva, NULL); + if (ptep && pte_present(*ptep)) { + pte = kvmppc_read_update_linux_pte(ptep, 1); + if (pte_write(pte)) + write_ok = 1; + } + rcu_read_unlock_sched(); + } + pfn = page_to_pfn(page); + } + + ret = -EFAULT; + if (psize > pte_size) + goto out_put; + + /* Check WIMG vs. the actual page we're accessing */ + if (!hpte_cache_flags_ok(r, is_io)) { + if (is_io) + return -EFAULT; + /* + * Allow guest to map emulated device memory as + * uncacheable, but actually make it cacheable. + */ + r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; + } + + /* Set the HPTE to point to pfn */ + r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); + if (hpte_is_writable(r) && !write_ok) + r = hpte_make_readonly(r); + ret = RESUME_GUEST; + preempt_disable(); + while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) + cpu_relax(); + if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || + rev->guest_rpte != hpte[2]) + /* HPTE has been changed under us; let the guest retry */ + goto out_unlock; + hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; + + rmap = &memslot->rmap[gfn - memslot->base_gfn]; + lock_rmap(rmap); + + /* Check if we might have been invalidated; let the guest retry if so */ + ret = RESUME_GUEST; + if (mmu_notifier_retry(vcpu, mmu_seq)) { + unlock_rmap(rmap); + goto out_unlock; + } + + /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */ + rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; + r &= rcbits | ~(HPTE_R_R | HPTE_R_C); + + if (hptep[0] & HPTE_V_VALID) { + /* HPTE was previously valid, so we need to invalidate it */ + unlock_rmap(rmap); + hptep[0] |= HPTE_V_ABSENT; + kvmppc_invalidate_hpte(kvm, hptep, index); + /* don't lose previous R and C bits */ + r |= hptep[1] & (HPTE_R_R | HPTE_R_C); + } else { + kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); + } + + hptep[1] = r; + eieio(); + hptep[0] = hpte[0]; + asm volatile("ptesync" : : : "memory"); + preempt_enable(); + if (page && hpte_is_writable(r)) + SetPageDirty(page); + + out_put: + if (page) { + /* + * We drop pages[0] here, not page because page might + * have been set to the head page of a compound, but + * we have to drop the reference on the correct tail + * page to match the get inside gup() + */ + put_page(pages[0]); + } + return ret; + + out_unlock: + hptep[0] &= ~HPTE_V_HVLOCK; + preempt_enable(); + goto out_put; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn)) +{ + int ret; + int retval = 0; + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) { + unsigned long start = memslot->userspace_addr; + unsigned long end; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + + ret = handler(kvm, &memslot->rmap[gfn_offset], + memslot->base_gfn + gfn_offset); + retval |= ret; + } + } + + return retval; +} + +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn) +{ + struct revmap_entry *rev = kvm->arch.revmap; + unsigned long h, i, j; + unsigned long *hptep; + unsigned long ptel, psize, rcbits; + + for (;;) { + lock_rmap(rmapp); + if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { + unlock_rmap(rmapp); + break; + } + + /* + * To avoid an ABBA deadlock with the HPTE lock bit, + * we can't spin on the HPTE lock while holding the + * rmap chain lock. + */ + i = *rmapp & KVMPPC_RMAP_INDEX; + hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { + /* unlock rmap before spinning on the HPTE lock */ + unlock_rmap(rmapp); + while (hptep[0] & HPTE_V_HVLOCK) + cpu_relax(); + continue; + } + j = rev[i].forw; + if (j == i) { + /* chain is now empty */ + *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); + } else { + /* remove i from chain */ + h = rev[i].back; + rev[h].forw = j; + rev[j].back = h; + rev[i].forw = rev[i].back = i; + *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; + } + + /* Now check and modify the HPTE */ + ptel = rev[i].guest_rpte; + psize = hpte_page_size(hptep[0], ptel); + if ((hptep[0] & HPTE_V_VALID) && + hpte_rpn(ptel, psize) == gfn) { + hptep[0] |= HPTE_V_ABSENT; + kvmppc_invalidate_hpte(kvm, hptep, i); + /* Harvest R and C */ + rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); + *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; + rev[i].guest_rpte = ptel | rcbits; + } + unlock_rmap(rmapp); + hptep[0] &= ~HPTE_V_HVLOCK; + } + return 0; +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + if (kvm->arch.using_mmu_notifiers) + kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + return 0; +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn) +{ + struct revmap_entry *rev = kvm->arch.revmap; + unsigned long head, i, j; + unsigned long *hptep; + int ret = 0; + + retry: + lock_rmap(rmapp); + if (*rmapp & KVMPPC_RMAP_REFERENCED) { + *rmapp &= ~KVMPPC_RMAP_REFERENCED; + ret = 1; + } + if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { + unlock_rmap(rmapp); + return ret; + } + + i = head = *rmapp & KVMPPC_RMAP_INDEX; + do { + hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + j = rev[i].forw; + + /* If this HPTE isn't referenced, ignore it */ + if (!(hptep[1] & HPTE_R_R)) + continue; + + if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { + /* unlock rmap before spinning on the HPTE lock */ + unlock_rmap(rmapp); + while (hptep[0] & HPTE_V_HVLOCK) + cpu_relax(); + goto retry; + } + + /* Now check and modify the HPTE */ + if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { + kvmppc_clear_ref_hpte(kvm, hptep, i); + rev[i].guest_rpte |= HPTE_R_R; + ret = 1; + } + hptep[0] &= ~HPTE_V_HVLOCK; + } while ((i = j) != head); + + unlock_rmap(rmapp); + return ret; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + if (!kvm->arch.using_mmu_notifiers) + return 0; + return kvm_handle_hva(kvm, hva, kvm_age_rmapp); +} + +static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn) +{ + struct revmap_entry *rev = kvm->arch.revmap; + unsigned long head, i, j; + unsigned long *hp; + int ret = 1; + + if (*rmapp & KVMPPC_RMAP_REFERENCED) + return 1; + + lock_rmap(rmapp); + if (*rmapp & KVMPPC_RMAP_REFERENCED) + goto out; + + if (*rmapp & KVMPPC_RMAP_PRESENT) { + i = head = *rmapp & KVMPPC_RMAP_INDEX; + do { + hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); + j = rev[i].forw; + if (hp[1] & HPTE_R_R) + goto out; + } while ((i = j) != head); + } + ret = 0; + + out: + unlock_rmap(rmapp); + return ret; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + if (!kvm->arch.using_mmu_notifiers) + return 0; + return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + if (!kvm->arch.using_mmu_notifiers) + return; + kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); +} + +static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) +{ + struct revmap_entry *rev = kvm->arch.revmap; + unsigned long head, i, j; + unsigned long *hptep; + int ret = 0; + + retry: + lock_rmap(rmapp); + if (*rmapp & KVMPPC_RMAP_CHANGED) { + *rmapp &= ~KVMPPC_RMAP_CHANGED; + ret = 1; + } + if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { + unlock_rmap(rmapp); + return ret; + } + + i = head = *rmapp & KVMPPC_RMAP_INDEX; + do { + hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + j = rev[i].forw; + + if (!(hptep[1] & HPTE_R_C)) + continue; + + if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { + /* unlock rmap before spinning on the HPTE lock */ + unlock_rmap(rmapp); + while (hptep[0] & HPTE_V_HVLOCK) + cpu_relax(); + goto retry; + } + + /* Now check and modify the HPTE */ + if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { + /* need to make it temporarily absent to clear C */ + hptep[0] |= HPTE_V_ABSENT; + kvmppc_invalidate_hpte(kvm, hptep, i); + hptep[1] &= ~HPTE_R_C; + eieio(); + hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; + rev[i].guest_rpte |= HPTE_R_C; + ret = 1; + } + hptep[0] &= ~HPTE_V_HVLOCK; + } while ((i = j) != head); + + unlock_rmap(rmapp); + return ret; +} + +long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + unsigned long i; + unsigned long *rmapp, *map; + + preempt_disable(); + rmapp = memslot->rmap; + map = memslot->dirty_bitmap; + for (i = 0; i < memslot->npages; ++i) { + if (kvm_test_clear_dirty(kvm, rmapp)) + __set_bit_le(i, map); + ++rmapp; + } + preempt_enable(); + return 0; +} + +void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, + unsigned long *nb_ret) +{ + struct kvm_memory_slot *memslot; + unsigned long gfn = gpa >> PAGE_SHIFT; + struct page *page, *pages[1]; + int npages; + unsigned long hva, psize, offset; + unsigned long pa; + unsigned long *physp; + + memslot = gfn_to_memslot(kvm, gfn); + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + return NULL; + if (!kvm->arch.using_mmu_notifiers) { + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) + return NULL; + physp += gfn - memslot->base_gfn; + pa = *physp; + if (!pa) { + if (kvmppc_get_guest_page(kvm, gfn, memslot, + PAGE_SIZE) < 0) + return NULL; + pa = *physp; + } + page = pfn_to_page(pa >> PAGE_SHIFT); + get_page(page); + } else { + hva = gfn_to_hva_memslot(memslot, gfn); + npages = get_user_pages_fast(hva, 1, 1, pages); + if (npages < 1) + return NULL; + page = pages[0]; + } + psize = PAGE_SIZE; + if (PageHuge(page)) { + page = compound_head(page); + psize <<= compound_order(page); + } + offset = gpa & (psize - 1); + if (nb_ret) + *nb_ret = psize - offset; + return page_address(page) + offset; +} + +void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) +{ + struct page *page = virt_to_page(va); + + put_page(page); +} + +void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) +{ + struct kvmppc_mmu *mmu = &vcpu->arch.mmu; + + if (cpu_has_feature(CPU_FTR_ARCH_206)) + vcpu->arch.slb_nr = 32; /* POWER7 */ + else + vcpu->arch.slb_nr = 64; + + mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; + mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; +} diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S new file mode 100644 index 00000000..f2e6e48e --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -0,0 +1,167 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#define SHADOW_SLB_ESID(num) (SLBSHADOW_SAVEAREA + (num * 0x10)) +#define SHADOW_SLB_VSID(num) (SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8) +#define UNBOLT_SLB_ENTRY(num) \ + ld r9, SHADOW_SLB_ESID(num)(r12); \ + /* Invalid? Skip. */; \ + rldicl. r0, r9, 37, 63; \ + beq slb_entry_skip_ ## num; \ + xoris r9, r9, SLB_ESID_V@h; \ + std r9, SHADOW_SLB_ESID(num)(r12); \ + slb_entry_skip_ ## num: + +#define REBOLT_SLB_ENTRY(num) \ + ld r10, SHADOW_SLB_ESID(num)(r11); \ + cmpdi r10, 0; \ + beq slb_exit_skip_ ## num; \ + oris r10, r10, SLB_ESID_V@h; \ + ld r9, SHADOW_SLB_VSID(num)(r11); \ + slbmte r9, r10; \ + std r10, SHADOW_SLB_ESID(num)(r11); \ +slb_exit_skip_ ## num: + +/****************************************************************************** + * * + * Entry code * + * * + *****************************************************************************/ + +.macro LOAD_GUEST_SEGMENTS + + /* Required state: + * + * MSR = ~IR|DR + * R13 = PACA + * R1 = host R1 + * R2 = host R2 + * R3 = shadow vcpu + * all other volatile GPRS = free except R4, R6 + * SVCPU[CR] = guest CR + * SVCPU[XER] = guest XER + * SVCPU[CTR] = guest CTR + * SVCPU[LR] = guest LR + */ + + /* Remove LPAR shadow entries */ + +#if SLB_NUM_BOLTED == 3 + + ld r12, PACA_SLBSHADOWPTR(r13) + + /* Save off the first entry so we can slbie it later */ + ld r10, SHADOW_SLB_ESID(0)(r12) + ld r11, SHADOW_SLB_VSID(0)(r12) + + /* Remove bolted entries */ + UNBOLT_SLB_ENTRY(0) + UNBOLT_SLB_ENTRY(1) + UNBOLT_SLB_ENTRY(2) + +#else +#error unknown number of bolted entries +#endif + + /* Flush SLB */ + + slbia + + /* r0 = esid & ESID_MASK */ + rldicr r10, r10, 0, 35 + /* r0 |= CLASS_BIT(VSID) */ + rldic r12, r11, 56 - 36, 36 + or r10, r10, r12 + slbie r10 + + isync + + /* Fill SLB with our shadow */ + + lbz r12, SVCPU_SLB_MAX(r3) + mulli r12, r12, 16 + addi r12, r12, SVCPU_SLB + add r12, r12, r3 + + /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */ + li r11, SVCPU_SLB + add r11, r11, r3 + +slb_loop_enter: + + ld r10, 0(r11) + + rldicl. r0, r10, 37, 63 + beq slb_loop_enter_skip + + ld r9, 8(r11) + slbmte r9, r10 + +slb_loop_enter_skip: + addi r11, r11, 16 + cmpd cr0, r11, r12 + blt slb_loop_enter + +slb_do_enter: + +.endm + +/****************************************************************************** + * * + * Exit code * + * * + *****************************************************************************/ + +.macro LOAD_HOST_SEGMENTS + + /* Register usage at this point: + * + * R1 = host R1 + * R2 = host R2 + * R12 = exit handler id + * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] + * SVCPU.* = guest * + * SVCPU[CR] = guest CR + * SVCPU[XER] = guest XER + * SVCPU[CTR] = guest CTR + * SVCPU[LR] = guest LR + * + */ + + /* Restore bolted entries from the shadow and fix it along the way */ + + /* We don't store anything in entry 0, so we don't need to take care of it */ + slbia + isync + +#if SLB_NUM_BOLTED == 3 + + ld r11, PACA_SLBSHADOWPTR(r13) + + REBOLT_SLB_ENTRY(0) + REBOLT_SLB_ENTRY(1) + REBOLT_SLB_ENTRY(2) + +#else +#error unknown number of bolted entries +#endif + +slb_do_exit: + +.endm diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c new file mode 100644 index 00000000..ea0f8c53 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -0,0 +1,73 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/hugetlb.h> +#include <linux/list.h> + +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> +#include <asm/hvcall.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> +#include <asm/kvm_host.h> +#include <asm/udbg.h> + +#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) + +long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce) +{ + struct kvm *kvm = vcpu->kvm; + struct kvmppc_spapr_tce_table *stt; + + /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ + /* liobn, ioba, tce); */ + + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn == liobn) { + unsigned long idx = ioba >> SPAPR_TCE_SHIFT; + struct page *page; + u64 *tbl; + + /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */ + /* liobn, stt, stt->window_size); */ + if (ioba >= stt->window_size) + return H_PARAMETER; + + page = stt->pages[idx / TCES_PER_PAGE]; + tbl = (u64 *)page_address(page); + + /* FIXME: Need to validate the TCE itself */ + /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */ + tbl[idx % TCES_PER_PAGE] = tce; + return H_SUCCESS; + } + } + + /* Didn't find the liobn, punt it to userspace */ + return H_TOO_HARD; +} diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c new file mode 100644 index 00000000..135663a3 --- /dev/null +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -0,0 +1,592 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include <asm/kvm_book3s.h> +#include <asm/reg.h> +#include <asm/switch_to.h> + +#define OP_19_XOP_RFID 18 +#define OP_19_XOP_RFI 50 + +#define OP_31_XOP_MFMSR 83 +#define OP_31_XOP_MTMSR 146 +#define OP_31_XOP_MTMSRD 178 +#define OP_31_XOP_MTSR 210 +#define OP_31_XOP_MTSRIN 242 +#define OP_31_XOP_TLBIEL 274 +#define OP_31_XOP_TLBIE 306 +#define OP_31_XOP_SLBMTE 402 +#define OP_31_XOP_SLBIE 434 +#define OP_31_XOP_SLBIA 498 +#define OP_31_XOP_MFSR 595 +#define OP_31_XOP_MFSRIN 659 +#define OP_31_XOP_DCBA 758 +#define OP_31_XOP_SLBMFEV 851 +#define OP_31_XOP_EIOIO 854 +#define OP_31_XOP_SLBMFEE 915 + +/* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ +#define OP_31_XOP_DCBZ 1010 + +#define OP_LFS 48 +#define OP_LFD 50 +#define OP_STFS 52 +#define OP_STFD 54 + +#define SPRN_GQR0 912 +#define SPRN_GQR1 913 +#define SPRN_GQR2 914 +#define SPRN_GQR3 915 +#define SPRN_GQR4 916 +#define SPRN_GQR5 917 +#define SPRN_GQR6 918 +#define SPRN_GQR7 919 + +/* Book3S_32 defines mfsrin(v) - but that messes up our abstract + * function pointers, so let's just disable the define. */ +#undef mfsrin + +enum priv_level { + PRIV_PROBLEM = 0, + PRIV_SUPER = 1, + PRIV_HYPER = 2, +}; + +static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level) +{ + /* PAPR VMs only access supervisor SPRs */ + if (vcpu->arch.papr_enabled && (level > PRIV_SUPER)) + return false; + + /* Limit user space to its own small SPR set */ + if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM) + return false; + + return true; +} + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + + switch (get_op(inst)) { + case 19: + switch (get_xop(inst)) { + case OP_19_XOP_RFID: + case OP_19_XOP_RFI: + kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0); + kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); + *advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + case 31: + switch (get_xop(inst)) { + case OP_31_XOP_MFMSR: + kvmppc_set_gpr(vcpu, get_rt(inst), + vcpu->arch.shared->msr); + break; + case OP_31_XOP_MTMSRD: + { + ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); + if (inst & 0x10000) { + vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE); + vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE); + } else + kvmppc_set_msr(vcpu, rs); + break; + } + case OP_31_XOP_MTMSR: + kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); + break; + case OP_31_XOP_MFSR: + { + int srnum; + + srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32); + if (vcpu->arch.mmu.mfsrin) { + u32 sr; + sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); + kvmppc_set_gpr(vcpu, get_rt(inst), sr); + } + break; + } + case OP_31_XOP_MFSRIN: + { + int srnum; + + srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf; + if (vcpu->arch.mmu.mfsrin) { + u32 sr; + sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); + kvmppc_set_gpr(vcpu, get_rt(inst), sr); + } + break; + } + case OP_31_XOP_MTSR: + vcpu->arch.mmu.mtsrin(vcpu, + (inst >> 16) & 0xf, + kvmppc_get_gpr(vcpu, get_rs(inst))); + break; + case OP_31_XOP_MTSRIN: + vcpu->arch.mmu.mtsrin(vcpu, + (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, + kvmppc_get_gpr(vcpu, get_rs(inst))); + break; + case OP_31_XOP_TLBIE: + case OP_31_XOP_TLBIEL: + { + bool large = (inst & 0x00200000) ? true : false; + ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst)); + vcpu->arch.mmu.tlbie(vcpu, addr, large); + break; + } + case OP_31_XOP_EIOIO: + break; + case OP_31_XOP_SLBMTE: + if (!vcpu->arch.mmu.slbmte) + return EMULATE_FAIL; + + vcpu->arch.mmu.slbmte(vcpu, + kvmppc_get_gpr(vcpu, get_rs(inst)), + kvmppc_get_gpr(vcpu, get_rb(inst))); + break; + case OP_31_XOP_SLBIE: + if (!vcpu->arch.mmu.slbie) + return EMULATE_FAIL; + + vcpu->arch.mmu.slbie(vcpu, + kvmppc_get_gpr(vcpu, get_rb(inst))); + break; + case OP_31_XOP_SLBIA: + if (!vcpu->arch.mmu.slbia) + return EMULATE_FAIL; + + vcpu->arch.mmu.slbia(vcpu); + break; + case OP_31_XOP_SLBMFEE: + if (!vcpu->arch.mmu.slbmfee) { + emulated = EMULATE_FAIL; + } else { + ulong t, rb; + + rb = kvmppc_get_gpr(vcpu, get_rb(inst)); + t = vcpu->arch.mmu.slbmfee(vcpu, rb); + kvmppc_set_gpr(vcpu, get_rt(inst), t); + } + break; + case OP_31_XOP_SLBMFEV: + if (!vcpu->arch.mmu.slbmfev) { + emulated = EMULATE_FAIL; + } else { + ulong t, rb; + + rb = kvmppc_get_gpr(vcpu, get_rb(inst)); + t = vcpu->arch.mmu.slbmfev(vcpu, rb); + kvmppc_set_gpr(vcpu, get_rt(inst), t); + } + break; + case OP_31_XOP_DCBA: + /* Gets treated as NOP */ + break; + case OP_31_XOP_DCBZ: + { + ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); + ulong ra = 0; + ulong addr, vaddr; + u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + u32 dsisr; + int r; + + if (get_ra(inst)) + ra = kvmppc_get_gpr(vcpu, get_ra(inst)); + + addr = (ra + rb) & ~31ULL; + if (!(vcpu->arch.shared->msr & MSR_SF)) + addr &= 0xffffffff; + vaddr = addr; + + r = kvmppc_st(vcpu, &addr, 32, zeros, true); + if ((r == -ENOENT) || (r == -EPERM)) { + struct kvmppc_book3s_shadow_vcpu *svcpu; + + svcpu = svcpu_get(vcpu); + *advance = 0; + vcpu->arch.shared->dar = vaddr; + svcpu->fault_dar = vaddr; + + dsisr = DSISR_ISSTORE; + if (r == -ENOENT) + dsisr |= DSISR_NOHPTE; + else if (r == -EPERM) + dsisr |= DSISR_PROTFAULT; + + vcpu->arch.shared->dsisr = dsisr; + svcpu->fault_dsisr = dsisr; + svcpu_put(svcpu); + + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_DATA_STORAGE); + } + + break; + } + default: + emulated = EMULATE_FAIL; + } + break; + default: + emulated = EMULATE_FAIL; + } + + if (emulated == EMULATE_FAIL) + emulated = kvmppc_emulate_paired_single(run, vcpu); + + return emulated; +} + +void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, + u32 val) +{ + if (upper) { + /* Upper BAT */ + u32 bl = (val >> 2) & 0x7ff; + bat->bepi_mask = (~bl << 17); + bat->bepi = val & 0xfffe0000; + bat->vs = (val & 2) ? 1 : 0; + bat->vp = (val & 1) ? 1 : 0; + bat->raw = (bat->raw & 0xffffffff00000000ULL) | val; + } else { + /* Lower BAT */ + bat->brpn = val & 0xfffe0000; + bat->wimg = (val >> 3) & 0xf; + bat->pp = val & 3; + bat->raw = (bat->raw & 0x00000000ffffffffULL) | ((u64)val << 32); + } +} + +static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + struct kvmppc_bat *bat; + + switch (sprn) { + case SPRN_IBAT0U ... SPRN_IBAT3L: + bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; + break; + case SPRN_IBAT4U ... SPRN_IBAT7L: + bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; + break; + case SPRN_DBAT0U ... SPRN_DBAT3L: + bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; + break; + case SPRN_DBAT4U ... SPRN_DBAT7L: + bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; + break; + default: + BUG(); + } + + return bat; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + int emulated = EMULATE_DONE; + ulong spr_val = kvmppc_get_gpr(vcpu, rs); + + switch (sprn) { + case SPRN_SDR1: + if (!spr_allowed(vcpu, PRIV_HYPER)) + goto unprivileged; + to_book3s(vcpu)->sdr1 = spr_val; + break; + case SPRN_DSISR: + vcpu->arch.shared->dsisr = spr_val; + break; + case SPRN_DAR: + vcpu->arch.shared->dar = spr_val; + break; + case SPRN_HIOR: + to_book3s(vcpu)->hior = spr_val; + break; + case SPRN_IBAT0U ... SPRN_IBAT3L: + case SPRN_IBAT4U ... SPRN_IBAT7L: + case SPRN_DBAT0U ... SPRN_DBAT3L: + case SPRN_DBAT4U ... SPRN_DBAT7L: + { + struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); + + kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val); + /* BAT writes happen so rarely that we're ok to flush + * everything here */ + kvmppc_mmu_pte_flush(vcpu, 0, 0); + kvmppc_mmu_flush_segments(vcpu); + break; + } + case SPRN_HID0: + to_book3s(vcpu)->hid[0] = spr_val; + break; + case SPRN_HID1: + to_book3s(vcpu)->hid[1] = spr_val; + break; + case SPRN_HID2: + to_book3s(vcpu)->hid[2] = spr_val; + break; + case SPRN_HID2_GEKKO: + to_book3s(vcpu)->hid[2] = spr_val; + /* HID2.PSE controls paired single on gekko */ + switch (vcpu->arch.pvr) { + case 0x00080200: /* lonestar 2.0 */ + case 0x00088202: /* lonestar 2.2 */ + case 0x70000100: /* gekko 1.0 */ + case 0x00080100: /* gekko 2.0 */ + case 0x00083203: /* gekko 2.3a */ + case 0x00083213: /* gekko 2.3b */ + case 0x00083204: /* gekko 2.4 */ + case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ + case 0x00087200: /* broadway */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_NATIVE_PS) { + /* Native paired singles */ + } else if (spr_val & (1 << 29)) { /* HID2.PSE */ + vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE; + kvmppc_giveup_ext(vcpu, MSR_FP); + } else { + vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE; + } + break; + } + break; + case SPRN_HID4: + case SPRN_HID4_GEKKO: + to_book3s(vcpu)->hid[4] = spr_val; + break; + case SPRN_HID5: + to_book3s(vcpu)->hid[5] = spr_val; + /* guest HID5 set can change is_dcbz32 */ + if (vcpu->arch.mmu.is_dcbz32(vcpu) && + (mfmsr() & MSR_HV)) + vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; + break; + case SPRN_GQR0: + case SPRN_GQR1: + case SPRN_GQR2: + case SPRN_GQR3: + case SPRN_GQR4: + case SPRN_GQR5: + case SPRN_GQR6: + case SPRN_GQR7: + to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; + break; + case SPRN_ICTC: + case SPRN_THRM1: + case SPRN_THRM2: + case SPRN_THRM3: + case SPRN_CTRLF: + case SPRN_CTRLT: + case SPRN_L2CR: + case SPRN_MMCR0_GEKKO: + case SPRN_MMCR1_GEKKO: + case SPRN_PMC1_GEKKO: + case SPRN_PMC2_GEKKO: + case SPRN_PMC3_GEKKO: + case SPRN_PMC4_GEKKO: + case SPRN_WPAR_GEKKO: + break; +unprivileged: + default: + printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); +#ifndef DEBUG_SPR + emulated = EMULATE_FAIL; +#endif + break; + } + + return emulated; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_IBAT0U ... SPRN_IBAT3L: + case SPRN_IBAT4U ... SPRN_IBAT7L: + case SPRN_DBAT0U ... SPRN_DBAT3L: + case SPRN_DBAT4U ... SPRN_DBAT7L: + { + struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); + + if (sprn % 2) + kvmppc_set_gpr(vcpu, rt, bat->raw >> 32); + else + kvmppc_set_gpr(vcpu, rt, bat->raw); + + break; + } + case SPRN_SDR1: + if (!spr_allowed(vcpu, PRIV_HYPER)) + goto unprivileged; + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); + break; + case SPRN_DSISR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr); + break; + case SPRN_DAR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); + break; + case SPRN_HIOR: + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); + break; + case SPRN_HID0: + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]); + break; + case SPRN_HID1: + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); + break; + case SPRN_HID2: + case SPRN_HID2_GEKKO: + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); + break; + case SPRN_HID4: + case SPRN_HID4_GEKKO: + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); + break; + case SPRN_HID5: + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); + break; + case SPRN_CFAR: + case SPRN_PURR: + kvmppc_set_gpr(vcpu, rt, 0); + break; + case SPRN_GQR0: + case SPRN_GQR1: + case SPRN_GQR2: + case SPRN_GQR3: + case SPRN_GQR4: + case SPRN_GQR5: + case SPRN_GQR6: + case SPRN_GQR7: + kvmppc_set_gpr(vcpu, rt, + to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); + break; + case SPRN_THRM1: + case SPRN_THRM2: + case SPRN_THRM3: + case SPRN_CTRLF: + case SPRN_CTRLT: + case SPRN_L2CR: + case SPRN_MMCR0_GEKKO: + case SPRN_MMCR1_GEKKO: + case SPRN_PMC1_GEKKO: + case SPRN_PMC2_GEKKO: + case SPRN_PMC3_GEKKO: + case SPRN_PMC4_GEKKO: + case SPRN_WPAR_GEKKO: + kvmppc_set_gpr(vcpu, rt, 0); + break; + default: +unprivileged: + printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); +#ifndef DEBUG_SPR + emulated = EMULATE_FAIL; +#endif + break; + } + + return emulated; +} + +u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) +{ + u32 dsisr = 0; + + /* + * This is what the spec says about DSISR bits (not mentioned = 0): + * + * 12:13 [DS] Set to bits 30:31 + * 15:16 [X] Set to bits 29:30 + * 17 [X] Set to bit 25 + * [D/DS] Set to bit 5 + * 18:21 [X] Set to bits 21:24 + * [D/DS] Set to bits 1:4 + * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS) + * 27:31 Set to bits 11:15 (RA) + */ + + switch (get_op(inst)) { + /* D-form */ + case OP_LFS: + case OP_LFD: + case OP_STFD: + case OP_STFS: + dsisr |= (inst >> 12) & 0x4000; /* bit 17 */ + dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */ + break; + /* X-form */ + case 31: + dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */ + dsisr |= (inst << 8) & 0x04000; /* bit 17 */ + dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */ + break; + default: + printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); + break; + } + + dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */ + + return dsisr; +} + +ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) +{ + ulong dar = 0; + ulong ra; + + switch (get_op(inst)) { + case OP_LFS: + case OP_LFD: + case OP_STFD: + case OP_STFS: + ra = get_ra(inst); + if (ra) + dar = kvmppc_get_gpr(vcpu, ra); + dar += (s32)((s16)inst); + break; + case 31: + ra = get_ra(inst); + if (ra) + dar = kvmppc_get_gpr(vcpu, ra); + dar += kvmppc_get_gpr(vcpu, get_rb(inst)); + break; + default: + printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); + break; + } + + return dar; +} diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c new file mode 100644 index 00000000..a150817d --- /dev/null +++ b/arch/powerpc/kvm/book3s_exports.c @@ -0,0 +1,35 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <linux/export.h> +#include <asm/kvm_book3s.h> + +#ifdef CONFIG_KVM_BOOK3S_64_HV +EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); +#else +EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); +EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); +#ifdef CONFIG_ALTIVEC +EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); +#endif +#ifdef CONFIG_VSX +EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx); +#endif +#endif + diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c new file mode 100644 index 00000000..108d1f58 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv.c @@ -0,0 +1,1437 @@ +/* + * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Paul Mackerras <paulus@au1.ibm.com> + * Alexander Graf <agraf@suse.de> + * Kevin Wolf <mail@kevin-wolf.de> + * + * Description: KVM functions specific to running on Book 3S + * processors in hypervisor mode (specifically POWER7 and later). + * + * This file is derived from arch/powerpc/kvm/book3s.c, + * by Alexander Graf <agraf@suse.de>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/preempt.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/anon_inodes.h> +#include <linux/cpumask.h> +#include <linux/spinlock.h> +#include <linux/page-flags.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu_context.h> +#include <asm/lppaca.h> +#include <asm/processor.h> +#include <asm/cputhreads.h> +#include <asm/page.h> +#include <asm/hvcall.h> +#include <asm/switch_to.h> +#include <linux/gfp.h> +#include <linux/vmalloc.h> +#include <linux/highmem.h> +#include <linux/hugetlb.h> + +/* #define EXIT_DEBUG */ +/* #define EXIT_DEBUG_SIMPLE */ +/* #define EXIT_DEBUG_INT */ + +static void kvmppc_end_cede(struct kvm_vcpu *vcpu); +static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore; +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +{ + vcpu->arch.shregs.msr = msr; + kvmppc_end_cede(vcpu); +} + +void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +{ + vcpu->arch.pvr = pvr; +} + +void kvmppc_dump_regs(struct kvm_vcpu *vcpu) +{ + int r; + + pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); + pr_err("pc = %.16lx msr = %.16llx trap = %x\n", + vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); + for (r = 0; r < 16; ++r) + pr_err("r%2d = %.16lx r%d = %.16lx\n", + r, kvmppc_get_gpr(vcpu, r), + r+16, kvmppc_get_gpr(vcpu, r+16)); + pr_err("ctr = %.16lx lr = %.16lx\n", + vcpu->arch.ctr, vcpu->arch.lr); + pr_err("srr0 = %.16llx srr1 = %.16llx\n", + vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); + pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", + vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1); + pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", + vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); + pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", + vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); + pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); + pr_err("fault dar = %.16lx dsisr = %.8x\n", + vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + pr_err("SLB (%d entries):\n", vcpu->arch.slb_max); + for (r = 0; r < vcpu->arch.slb_max; ++r) + pr_err(" ESID = %.16llx VSID = %.16llx\n", + vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); + pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", + vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, + vcpu->arch.last_inst); +} + +struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) +{ + int r; + struct kvm_vcpu *v, *ret = NULL; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(r, v, kvm) { + if (v->vcpu_id == id) { + ret = v; + break; + } + } + mutex_unlock(&kvm->lock); + return ret; +} + +static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) +{ + vpa->shared_proc = 1; + vpa->yield_count = 1; +} + +static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, + unsigned long flags, + unsigned long vcpuid, unsigned long vpa) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long len, nb; + void *va; + struct kvm_vcpu *tvcpu; + int err = H_PARAMETER; + + tvcpu = kvmppc_find_vcpu(kvm, vcpuid); + if (!tvcpu) + return H_PARAMETER; + + flags >>= 63 - 18; + flags &= 7; + if (flags == 0 || flags == 4) + return H_PARAMETER; + if (flags < 4) { + if (vpa & 0x7f) + return H_PARAMETER; + if (flags >= 2 && !tvcpu->arch.vpa) + return H_RESOURCE; + /* registering new area; convert logical addr to real */ + va = kvmppc_pin_guest_page(kvm, vpa, &nb); + if (va == NULL) + return H_PARAMETER; + if (flags <= 1) + len = *(unsigned short *)(va + 4); + else + len = *(unsigned int *)(va + 4); + if (len > nb) + goto out_unpin; + switch (flags) { + case 1: /* register VPA */ + if (len < 640) + goto out_unpin; + if (tvcpu->arch.vpa) + kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); + tvcpu->arch.vpa = va; + init_vpa(vcpu, va); + break; + case 2: /* register DTL */ + if (len < 48) + goto out_unpin; + len -= len % 48; + if (tvcpu->arch.dtl) + kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); + tvcpu->arch.dtl = va; + tvcpu->arch.dtl_end = va + len; + break; + case 3: /* register SLB shadow buffer */ + if (len < 16) + goto out_unpin; + if (tvcpu->arch.slb_shadow) + kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); + tvcpu->arch.slb_shadow = va; + break; + } + } else { + switch (flags) { + case 5: /* unregister VPA */ + if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) + return H_RESOURCE; + if (!tvcpu->arch.vpa) + break; + kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa); + tvcpu->arch.vpa = NULL; + break; + case 6: /* unregister DTL */ + if (!tvcpu->arch.dtl) + break; + kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); + tvcpu->arch.dtl = NULL; + break; + case 7: /* unregister SLB shadow buffer */ + if (!tvcpu->arch.slb_shadow) + break; + kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); + tvcpu->arch.slb_shadow = NULL; + break; + } + } + return H_SUCCESS; + + out_unpin: + kvmppc_unpin_guest_page(kvm, va); + return err; +} + +int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) +{ + unsigned long req = kvmppc_get_gpr(vcpu, 3); + unsigned long target, ret = H_SUCCESS; + struct kvm_vcpu *tvcpu; + + switch (req) { + case H_ENTER: + ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + break; + case H_CEDE: + break; + case H_PROD: + target = kvmppc_get_gpr(vcpu, 4); + tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); + if (!tvcpu) { + ret = H_PARAMETER; + break; + } + tvcpu->arch.prodded = 1; + smp_mb(); + if (vcpu->arch.ceded) { + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } + } + break; + case H_CONFER: + break; + case H_REGISTER_VPA: + ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); + break; + default: + return RESUME_HOST; + } + kvmppc_set_gpr(vcpu, 3, ret); + vcpu->arch.hcall_needed = 0; + return RESUME_GUEST; +} + +static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + struct task_struct *tsk) +{ + int r = RESUME_HOST; + + vcpu->stat.sum_exits++; + + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + switch (vcpu->arch.trap) { + /* We're good on these - the host merely wanted to get our attention */ + case BOOK3S_INTERRUPT_HV_DECREMENTER: + vcpu->stat.dec_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_EXTERNAL: + vcpu->stat.ext_intr_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PERFMON: + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PROGRAM: + { + ulong flags; + /* + * Normally program interrupts are delivered directly + * to the guest by the hardware, but we can get here + * as a result of a hypervisor emulation interrupt + * (e40) getting turned into a 700 by BML RTAS. + */ + flags = vcpu->arch.shregs.msr & 0x1f0000ull; + kvmppc_core_queue_program(vcpu, flags); + r = RESUME_GUEST; + break; + } + case BOOK3S_INTERRUPT_SYSCALL: + { + /* hcall - punt to userspace */ + int i; + + if (vcpu->arch.shregs.msr & MSR_PR) { + /* sc 1 from userspace - reflect to guest syscall */ + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL); + r = RESUME_GUEST; + break; + } + run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); + for (i = 0; i < 9; ++i) + run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); + run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->arch.hcall_needed = 1; + r = RESUME_HOST; + break; + } + /* + * We get these next two if the guest accesses a page which it thinks + * it has mapped but which is not actually present, either because + * it is for an emulated I/O device or because the corresonding + * host page has been paged out. Any other HDSI/HISI interrupts + * have been handled already. + */ + case BOOK3S_INTERRUPT_H_DATA_STORAGE: + r = kvmppc_book3s_hv_page_fault(run, vcpu, + vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + break; + case BOOK3S_INTERRUPT_H_INST_STORAGE: + r = kvmppc_book3s_hv_page_fault(run, vcpu, + kvmppc_get_pc(vcpu), 0); + break; + /* + * This occurs if the guest executes an illegal instruction. + * We just generate a program interrupt to the guest, since + * we don't emulate any guest instructions at this stage. + */ + case BOOK3S_INTERRUPT_H_EMUL_ASSIST: + kvmppc_core_queue_program(vcpu, 0x80000); + r = RESUME_GUEST; + break; + default: + kvmppc_dump_regs(vcpu); + printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", + vcpu->arch.trap, kvmppc_get_pc(vcpu), + vcpu->arch.shregs.msr); + r = RESUME_HOST; + BUG(); + break; + } + + return r; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int i; + + sregs->pvr = vcpu->arch.pvr; + + memset(sregs, 0, sizeof(struct kvm_sregs)); + for (i = 0; i < vcpu->arch.slb_max; i++) { + sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; + sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; + } + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int i, j; + + kvmppc_set_pvr(vcpu, sregs->pvr); + + j = 0; + for (i = 0; i < vcpu->arch.slb_nr; i++) { + if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) { + vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe; + vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv; + ++j; + } + } + vcpu->arch.slb_max = j; + + return 0; +} + +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = put_user(0, (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + { + u64 hior; + /* Only allow this to be set to zero */ + r = get_user(hior, (u64 __user *)reg->addr); + if (!r && (hior != 0)) + r = -EINVAL; + break; + } + default: + break; + } + + return r; +} + +int kvmppc_core_check_processor_compat(void) +{ + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + return -EIO; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvm_vcpu *vcpu; + int err = -EINVAL; + int core; + struct kvmppc_vcore *vcore; + + core = id / threads_per_core; + if (core >= KVM_MAX_VCORES) + goto out; + + err = -ENOMEM; + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) + goto out; + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + vcpu->arch.shared = &vcpu->arch.shregs; + vcpu->arch.last_cpu = -1; + vcpu->arch.mmcr[0] = MMCR0_FC; + vcpu->arch.ctrl = CTRL_RUNLATCH; + /* default to host PVR, since we can't spoof it */ + vcpu->arch.pvr = mfspr(SPRN_PVR); + kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + + kvmppc_mmu_book3s_hv_init(vcpu); + + /* + * We consider the vcpu stopped until we see the first run ioctl for it. + */ + vcpu->arch.state = KVMPPC_VCPU_STOPPED; + + init_waitqueue_head(&vcpu->arch.cpu_run); + + mutex_lock(&kvm->lock); + vcore = kvm->arch.vcores[core]; + if (!vcore) { + vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); + if (vcore) { + INIT_LIST_HEAD(&vcore->runnable_threads); + spin_lock_init(&vcore->lock); + init_waitqueue_head(&vcore->wq); + } + kvm->arch.vcores[core] = vcore; + } + mutex_unlock(&kvm->lock); + + if (!vcore) + goto free_vcpu; + + spin_lock(&vcore->lock); + ++vcore->num_threads; + spin_unlock(&vcore->lock); + vcpu->arch.vcore = vcore; + + vcpu->arch.cpu_type = KVM_CPU_3S_64; + kvmppc_sanity_check(vcpu); + + return vcpu; + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.dtl) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); + if (vcpu->arch.slb_shadow) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); + if (vcpu->arch.vpa) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); +} + +static void kvmppc_set_timer(struct kvm_vcpu *vcpu) +{ + unsigned long dec_nsec, now; + + now = get_tb(); + if (now > vcpu->arch.dec_expires) { + /* decrementer has already gone negative */ + kvmppc_core_queue_dec(vcpu); + kvmppc_core_prepare_to_enter(vcpu); + return; + } + dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC + / tb_ticks_per_sec; + hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), + HRTIMER_MODE_REL); + vcpu->arch.timer_running = 1; +} + +static void kvmppc_end_cede(struct kvm_vcpu *vcpu) +{ + vcpu->arch.ceded = 0; + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } +} + +extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern void xics_wake_cpu(int cpu); + +static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, + struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *v; + + if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) + return; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + --vc->n_runnable; + ++vc->n_busy; + /* decrement the physical thread id of each following vcpu */ + v = vcpu; + list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) + --v->arch.ptid; + list_del(&vcpu->arch.run_list); +} + +static void kvmppc_start_thread(struct kvm_vcpu *vcpu) +{ + int cpu; + struct paca_struct *tpaca; + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } + cpu = vc->pcpu + vcpu->arch.ptid; + tpaca = &paca[cpu]; + tpaca->kvm_hstate.kvm_vcpu = vcpu; + tpaca->kvm_hstate.kvm_vcore = vc; + tpaca->kvm_hstate.napping = 0; + vcpu->cpu = vc->pcpu; + smp_wmb(); +#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) + if (vcpu->arch.ptid) { + tpaca->cpu_start = 0x80; + wmb(); + xics_wake_cpu(cpu); + ++vc->n_woken; + } +#endif +} + +static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) +{ + int i; + + HMT_low(); + i = 0; + while (vc->nap_count < vc->n_woken) { + if (++i >= 1000000) { + pr_err("kvmppc_wait_for_nap timeout %d %d\n", + vc->nap_count, vc->n_woken); + break; + } + cpu_relax(); + } + HMT_medium(); +} + +/* + * Check that we are on thread 0 and that any other threads in + * this core are off-line. + */ +static int on_primary_thread(void) +{ + int cpu = smp_processor_id(); + int thr = cpu_thread_in_core(cpu); + + if (thr) + return 0; + while (++thr < threads_per_core) + if (cpu_online(cpu + thr)) + return 0; + return 1; +} + +/* + * Run a set of guest threads on a physical core. + * Called with vc->lock held. + */ +static int kvmppc_run_core(struct kvmppc_vcore *vc) +{ + struct kvm_vcpu *vcpu, *vcpu0, *vnext; + long ret; + u64 now; + int ptid; + + /* don't start if any threads have a signal pending */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + if (signal_pending(vcpu->arch.run_task)) + return 0; + + /* + * Make sure we are running on thread 0, and that + * secondary threads are offline. + * XXX we should also block attempts to bring any + * secondary threads online. + */ + if (threads_per_core > 1 && !on_primary_thread()) { + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + vcpu->arch.ret = -EBUSY; + goto out; + } + + /* + * Assign physical thread IDs, first to non-ceded vcpus + * and then to ceded ones. + */ + ptid = 0; + vcpu0 = NULL; + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + if (!vcpu->arch.ceded) { + if (!ptid) + vcpu0 = vcpu; + vcpu->arch.ptid = ptid++; + } + } + if (!vcpu0) + return 0; /* nothing to run */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + if (vcpu->arch.ceded) + vcpu->arch.ptid = ptid++; + + vc->n_woken = 0; + vc->nap_count = 0; + vc->entry_exit_count = 0; + vc->vcore_state = VCORE_RUNNING; + vc->in_guest = 0; + vc->pcpu = smp_processor_id(); + vc->napping_threads = 0; + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + kvmppc_start_thread(vcpu); + + preempt_disable(); + spin_unlock(&vc->lock); + + kvm_guest_enter(); + __kvmppc_vcore_entry(NULL, vcpu0); + + spin_lock(&vc->lock); + /* disable sending of IPIs on virtual external irqs */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + vcpu->cpu = -1; + /* wait for secondary threads to finish writing their state to memory */ + if (vc->nap_count < vc->n_woken) + kvmppc_wait_for_nap(vc); + /* prevent other vcpu threads from doing kvmppc_start_thread() now */ + vc->vcore_state = VCORE_EXITING; + spin_unlock(&vc->lock); + + /* make sure updates to secondary vcpu structs are visible now */ + smp_mb(); + kvm_guest_exit(); + + preempt_enable(); + kvm_resched(vcpu); + + now = get_tb(); + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + /* cancel pending dec exception if dec is positive */ + if (now < vcpu->arch.dec_expires && + kvmppc_core_pending_dec(vcpu)) + kvmppc_core_dequeue_dec(vcpu); + + ret = RESUME_GUEST; + if (vcpu->arch.trap) + ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, + vcpu->arch.run_task); + + vcpu->arch.ret = ret; + vcpu->arch.trap = 0; + + if (vcpu->arch.ceded) { + if (ret != RESUME_GUEST) + kvmppc_end_cede(vcpu); + else + kvmppc_set_timer(vcpu); + } + } + + spin_lock(&vc->lock); + out: + vc->vcore_state = VCORE_INACTIVE; + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, + arch.run_list) { + if (vcpu->arch.ret != RESUME_GUEST) { + kvmppc_remove_runnable(vc, vcpu); + wake_up(&vcpu->arch.cpu_run); + } + } + + return 1; +} + +/* + * Wait for some other vcpu thread to execute us, and + * wake us up when we need to handle something in the host. + */ +static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) + schedule(); + finish_wait(&vcpu->arch.cpu_run, &wait); +} + +/* + * All the vcpus in this vcore are idle, so wait for a decrementer + * or external interrupt to one of the vcpus. vc->lock is held. + */ +static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) +{ + DEFINE_WAIT(wait); + struct kvm_vcpu *v; + int all_idle = 1; + + prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + vc->vcore_state = VCORE_SLEEPING; + spin_unlock(&vc->lock); + list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { + if (!v->arch.ceded || v->arch.pending_exceptions) { + all_idle = 0; + break; + } + } + if (all_idle) + schedule(); + finish_wait(&vc->wq, &wait); + spin_lock(&vc->lock); + vc->vcore_state = VCORE_INACTIVE; +} + +static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + int n_ceded; + int prev_state; + struct kvmppc_vcore *vc; + struct kvm_vcpu *v, *vn; + + kvm_run->exit_reason = 0; + vcpu->arch.ret = RESUME_GUEST; + vcpu->arch.trap = 0; + + /* + * Synchronize with other threads in this virtual core + */ + vc = vcpu->arch.vcore; + spin_lock(&vc->lock); + vcpu->arch.ceded = 0; + vcpu->arch.run_task = current; + vcpu->arch.kvm_run = kvm_run; + prev_state = vcpu->arch.state; + vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; + list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); + ++vc->n_runnable; + + /* + * This happens the first time this is called for a vcpu. + * If the vcore is already running, we may be able to start + * this thread straight away and have it join in. + */ + if (prev_state == KVMPPC_VCPU_STOPPED) { + if (vc->vcore_state == VCORE_RUNNING && + VCORE_EXIT_COUNT(vc) == 0) { + vcpu->arch.ptid = vc->n_runnable - 1; + kvmppc_start_thread(vcpu); + } + + } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST) + --vc->n_busy; + + while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && + !signal_pending(current)) { + if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) { + spin_unlock(&vc->lock); + kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); + spin_lock(&vc->lock); + continue; + } + n_ceded = 0; + list_for_each_entry(v, &vc->runnable_threads, arch.run_list) + n_ceded += v->arch.ceded; + if (n_ceded == vc->n_runnable) + kvmppc_vcore_blocked(vc); + else + kvmppc_run_core(vc); + + list_for_each_entry_safe(v, vn, &vc->runnable_threads, + arch.run_list) { + kvmppc_core_prepare_to_enter(v); + if (signal_pending(v->arch.run_task)) { + kvmppc_remove_runnable(vc, v); + v->stat.signal_exits++; + v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; + v->arch.ret = -EINTR; + wake_up(&v->arch.cpu_run); + } + } + } + + if (signal_pending(current)) { + if (vc->vcore_state == VCORE_RUNNING || + vc->vcore_state == VCORE_EXITING) { + spin_unlock(&vc->lock); + kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); + spin_lock(&vc->lock); + } + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { + kvmppc_remove_runnable(vc, vcpu); + vcpu->stat.signal_exits++; + kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->arch.ret = -EINTR; + } + } + + spin_unlock(&vc->lock); + return vcpu->arch.ret; +} + +int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int r; + + if (!vcpu->arch.sane) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return -EINVAL; + } + + kvmppc_core_prepare_to_enter(vcpu); + + /* No need to go into the guest when all we'll do is come back out */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + + /* On the first time here, set up VRMA or RMA */ + if (!vcpu->kvm->arch.rma_setup_done) { + r = kvmppc_hv_setup_rma(vcpu); + if (r) + return r; + } + + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_vsx_to_thread(current); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; + vcpu->arch.pgdir = current->mm->pgd; + + do { + r = kvmppc_run_vcpu(run, vcpu); + + if (run->exit_reason == KVM_EXIT_PAPR_HCALL && + !(vcpu->arch.shregs.msr & MSR_PR)) { + r = kvmppc_pseries_do_hcall(vcpu); + kvmppc_core_prepare_to_enter(vcpu); + } + } while (r == RESUME_GUEST); + return r; +} + +static long kvmppc_stt_npages(unsigned long window_size) +{ + return ALIGN((window_size >> SPAPR_TCE_SHIFT) + * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; +} + +static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) +{ + struct kvm *kvm = stt->kvm; + int i; + + mutex_lock(&kvm->lock); + list_del(&stt->list); + for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) + __free_page(stt->pages[i]); + kfree(stt); + mutex_unlock(&kvm->lock); + + kvm_put_kvm(kvm); +} + +static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; + struct page *page; + + if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) + return VM_FAULT_SIGBUS; + + page = stt->pages[vmf->pgoff]; + get_page(page); + vmf->page = page; + return 0; +} + +static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { + .fault = kvm_spapr_tce_fault, +}; + +static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &kvm_spapr_tce_vm_ops; + return 0; +} + +static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) +{ + struct kvmppc_spapr_tce_table *stt = filp->private_data; + + release_spapr_tce_table(stt); + return 0; +} + +static struct file_operations kvm_spapr_tce_fops = { + .mmap = kvm_spapr_tce_mmap, + .release = kvm_spapr_tce_release, +}; + +long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, + struct kvm_create_spapr_tce *args) +{ + struct kvmppc_spapr_tce_table *stt = NULL; + long npages; + int ret = -ENOMEM; + int i; + + /* Check this LIOBN hasn't been previously allocated */ + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn == args->liobn) + return -EBUSY; + } + + npages = kvmppc_stt_npages(args->window_size); + + stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *), + GFP_KERNEL); + if (!stt) + goto fail; + + stt->liobn = args->liobn; + stt->window_size = args->window_size; + stt->kvm = kvm; + + for (i = 0; i < npages; i++) { + stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!stt->pages[i]) + goto fail; + } + + kvm_get_kvm(kvm); + + mutex_lock(&kvm->lock); + list_add(&stt->list, &kvm->arch.spapr_tce_tables); + + mutex_unlock(&kvm->lock); + + return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR); + +fail: + if (stt) { + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + } + return ret; +} + +/* Work out RMLS (real mode limit selector) field value for a given RMA size. + Assumes POWER7 or PPC970. */ +static inline int lpcr_rmls(unsigned long rma_size) +{ + switch (rma_size) { + case 32ul << 20: /* 32 MB */ + if (cpu_has_feature(CPU_FTR_ARCH_206)) + return 8; /* only supported on POWER7 */ + return -1; + case 64ul << 20: /* 64 MB */ + return 3; + case 128ul << 20: /* 128 MB */ + return 7; + case 256ul << 20: /* 256 MB */ + return 4; + case 1ul << 30: /* 1 GB */ + return 2; + case 16ul << 30: /* 16 GB */ + return 1; + case 256ul << 30: /* 256 GB */ + return 0; + default: + return -1; + } +} + +static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kvmppc_linear_info *ri = vma->vm_file->private_data; + struct page *page; + + if (vmf->pgoff >= ri->npages) + return VM_FAULT_SIGBUS; + + page = pfn_to_page(ri->base_pfn + vmf->pgoff); + get_page(page); + vmf->page = page; + return 0; +} + +static const struct vm_operations_struct kvm_rma_vm_ops = { + .fault = kvm_rma_fault, +}; + +static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &kvm_rma_vm_ops; + return 0; +} + +static int kvm_rma_release(struct inode *inode, struct file *filp) +{ + struct kvmppc_linear_info *ri = filp->private_data; + + kvm_release_rma(ri); + return 0; +} + +static struct file_operations kvm_rma_fops = { + .mmap = kvm_rma_mmap, + .release = kvm_rma_release, +}; + +long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) +{ + struct kvmppc_linear_info *ri; + long fd; + + ri = kvm_alloc_rma(); + if (!ri) + return -ENOMEM; + + fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); + if (fd < 0) + kvm_release_rma(ri); + + ret->rma_size = ri->npages << PAGE_SHIFT; + return fd; +} + +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + int r; + unsigned long n; + + mutex_lock(&kvm->slots_lock); + + r = -EINVAL; + if (log->slot >= KVM_MEMORY_SLOTS) + goto out; + + memslot = id_to_memslot(kvm->memslots, log->slot); + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + n = kvm_dirty_bitmap_bytes(memslot); + memset(memslot->dirty_bitmap, 0, n); + + r = kvmppc_hv_get_dirty_log(kvm, memslot); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) + goto out; + + r = 0; +out: + mutex_unlock(&kvm->slots_lock); + return r; +} + +static unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + unsigned long npages; + unsigned long *phys; + + /* Allocate a slot_phys array */ + phys = kvm->arch.slot_phys[mem->slot]; + if (!kvm->arch.using_mmu_notifiers && !phys) { + npages = mem->memory_size >> PAGE_SHIFT; + phys = vzalloc(npages * sizeof(unsigned long)); + if (!phys) + return -ENOMEM; + kvm->arch.slot_phys[mem->slot] = phys; + kvm->arch.slot_npages[mem->slot] = npages; + } + + return 0; +} + +static void unpin_slot(struct kvm *kvm, int slot_id) +{ + unsigned long *physp; + unsigned long j, npages, pfn; + struct page *page; + + physp = kvm->arch.slot_phys[slot_id]; + npages = kvm->arch.slot_npages[slot_id]; + if (physp) { + spin_lock(&kvm->arch.slot_phys_lock); + for (j = 0; j < npages; j++) { + if (!(physp[j] & KVMPPC_GOT_PAGE)) + continue; + pfn = physp[j] >> PAGE_SHIFT; + page = pfn_to_page(pfn); + SetPageDirty(page); + put_page(page); + } + kvm->arch.slot_phys[slot_id] = NULL; + spin_unlock(&kvm->arch.slot_phys_lock); + vfree(physp); + } +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ +} + +static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) +{ + int err = 0; + struct kvm *kvm = vcpu->kvm; + struct kvmppc_linear_info *ri = NULL; + unsigned long hva; + struct kvm_memory_slot *memslot; + struct vm_area_struct *vma; + unsigned long lpcr, senc; + unsigned long psize, porder; + unsigned long rma_size; + unsigned long rmls; + unsigned long *physp; + unsigned long i, npages; + + mutex_lock(&kvm->lock); + if (kvm->arch.rma_setup_done) + goto out; /* another vcpu beat us to it */ + + /* Look up the memslot for guest physical address 0 */ + memslot = gfn_to_memslot(kvm, 0); + + /* We must have some memory at 0 by now */ + err = -EINVAL; + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + goto out; + + /* Look up the VMA for the start of this memory slot */ + hva = memslot->userspace_addr; + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, hva); + if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) + goto up_out; + + psize = vma_kernel_pagesize(vma); + porder = __ilog2(psize); + + /* Is this one of our preallocated RMAs? */ + if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && + hva == vma->vm_start) + ri = vma->vm_file->private_data; + + up_read(¤t->mm->mmap_sem); + + if (!ri) { + /* On POWER7, use VRMA; on PPC970, give up */ + err = -EPERM; + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + pr_err("KVM: CPU requires an RMO\n"); + goto out; + } + + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out; + + /* Update VRMASD field in the LPCR */ + senc = slb_pgsize_encoding(psize); + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); + kvm->arch.lpcr = lpcr; + + /* Create HPTEs in the hash page table for the VRMA */ + kvmppc_map_vrma(vcpu, memslot, porder); + + } else { + /* Set up to use an RMO region */ + rma_size = ri->npages; + if (rma_size > memslot->npages) + rma_size = memslot->npages; + rma_size <<= PAGE_SHIFT; + rmls = lpcr_rmls(rma_size); + err = -EINVAL; + if (rmls < 0) { + pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); + goto out; + } + atomic_inc(&ri->use_count); + kvm->arch.rma = ri; + + /* Update LPCR and RMOR */ + lpcr = kvm->arch.lpcr; + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970; insert RMLS value (split field) in HID4 */ + lpcr &= ~((1ul << HID4_RMLS0_SH) | + (3ul << HID4_RMLS2_SH)); + lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | + ((rmls & 3) << HID4_RMLS2_SH); + /* RMOR is also in HID4 */ + lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) + << HID4_RMOR_SH; + } else { + /* POWER7 */ + lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); + lpcr |= rmls << LPCR_RMLS_SH; + kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; + } + kvm->arch.lpcr = lpcr; + pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", + ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); + + /* Initialize phys addrs of pages in RMO */ + npages = ri->npages; + porder = __ilog2(npages); + physp = kvm->arch.slot_phys[memslot->id]; + spin_lock(&kvm->arch.slot_phys_lock); + for (i = 0; i < npages; ++i) + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; + spin_unlock(&kvm->arch.slot_phys_lock); + } + + /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ + smp_wmb(); + kvm->arch.rma_setup_done = 1; + err = 0; + out: + mutex_unlock(&kvm->lock); + return err; + + up_out: + up_read(¤t->mm->mmap_sem); + goto out; +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + long r; + unsigned long lpcr; + + /* Allocate hashed page table */ + r = kvmppc_alloc_hpt(kvm); + if (r) + return r; + + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + + kvm->arch.rma = NULL; + + kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); + + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970; HID4 is effectively the LPCR */ + unsigned long lpid = kvm->arch.lpid; + kvm->arch.host_lpid = 0; + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); + lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); + lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | + ((lpid & 0xf) << HID4_LPID5_SH); + } else { + /* POWER7; init LPCR for virtual RMA mode */ + kvm->arch.host_lpid = mfspr(SPRN_LPID); + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); + lpcr &= LPCR_PECE | LPCR_LPES; + lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | + LPCR_VPM0 | LPCR_VPM1; + kvm->arch.vrma_slb_v = SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + } + kvm->arch.lpcr = lpcr; + + kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); + spin_lock_init(&kvm->arch.slot_phys_lock); + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ + unsigned long i; + + if (!kvm->arch.using_mmu_notifiers) + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + unpin_slot(kvm, i); + + if (kvm->arch.rma) { + kvm_release_rma(kvm->arch.rma); + kvm->arch.rma = NULL; + } + + kvmppc_free_hpt(kvm); + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); +} + +/* These are stubs for now */ +void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) +{ +} + +/* We don't need to emulate any privileged instructions or dcbz */ +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + return EMULATE_FAIL; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + return EMULATE_FAIL; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + return EMULATE_FAIL; +} + +static int kvmppc_book3s_hv_init(void) +{ + int r; + + r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + + if (r) + return r; + + r = kvmppc_mmu_hv_init(); + + return r; +} + +static void kvmppc_book3s_hv_exit(void) +{ + kvm_exit(); +} + +module_init(kvmppc_book3s_hv_init); +module_exit(kvmppc_book3s_hv_exit); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c new file mode 100644 index 00000000..e1b60f56 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -0,0 +1,230 @@ +/* + * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> +#include <linux/preempt.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/bootmem.h> +#include <linux/init.h> + +#include <asm/cputable.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> + +#define KVM_LINEAR_RMA 0 +#define KVM_LINEAR_HPT 1 + +static void __init kvm_linear_init_one(ulong size, int count, int type); +static struct kvmppc_linear_info *kvm_alloc_linear(int type); +static void kvm_release_linear(struct kvmppc_linear_info *ri); + +/*************** RMA *************/ + +/* + * This maintains a list of RMAs (real mode areas) for KVM guests to use. + * Each RMA has to be physically contiguous and of a size that the + * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, + * and other larger sizes. Since we are unlikely to be allocate that + * much physically contiguous memory after the system is up and running, + * we preallocate a set of RMAs in early boot for KVM to use. + */ +static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ +static unsigned long kvm_rma_count; + +/* Work out RMLS (real mode limit selector) field value for a given RMA size. + Assumes POWER7 or PPC970. */ +static inline int lpcr_rmls(unsigned long rma_size) +{ + switch (rma_size) { + case 32ul << 20: /* 32 MB */ + if (cpu_has_feature(CPU_FTR_ARCH_206)) + return 8; /* only supported on POWER7 */ + return -1; + case 64ul << 20: /* 64 MB */ + return 3; + case 128ul << 20: /* 128 MB */ + return 7; + case 256ul << 20: /* 256 MB */ + return 4; + case 1ul << 30: /* 1 GB */ + return 2; + case 16ul << 30: /* 16 GB */ + return 1; + case 256ul << 30: /* 256 GB */ + return 0; + default: + return -1; + } +} + +static int __init early_parse_rma_size(char *p) +{ + if (!p) + return 1; + + kvm_rma_size = memparse(p, &p); + + return 0; +} +early_param("kvm_rma_size", early_parse_rma_size); + +static int __init early_parse_rma_count(char *p) +{ + if (!p) + return 1; + + kvm_rma_count = simple_strtoul(p, NULL, 0); + + return 0; +} +early_param("kvm_rma_count", early_parse_rma_count); + +struct kvmppc_linear_info *kvm_alloc_rma(void) +{ + return kvm_alloc_linear(KVM_LINEAR_RMA); +} +EXPORT_SYMBOL_GPL(kvm_alloc_rma); + +void kvm_release_rma(struct kvmppc_linear_info *ri) +{ + kvm_release_linear(ri); +} +EXPORT_SYMBOL_GPL(kvm_release_rma); + +/*************** HPT *************/ + +/* + * This maintains a list of big linear HPT tables that contain the GVA->HPA + * memory mappings. If we don't reserve those early on, we might not be able + * to get a big (usually 16MB) linear memory region from the kernel anymore. + */ + +static unsigned long kvm_hpt_count; + +static int __init early_parse_hpt_count(char *p) +{ + if (!p) + return 1; + + kvm_hpt_count = simple_strtoul(p, NULL, 0); + + return 0; +} +early_param("kvm_hpt_count", early_parse_hpt_count); + +struct kvmppc_linear_info *kvm_alloc_hpt(void) +{ + return kvm_alloc_linear(KVM_LINEAR_HPT); +} +EXPORT_SYMBOL_GPL(kvm_alloc_hpt); + +void kvm_release_hpt(struct kvmppc_linear_info *li) +{ + kvm_release_linear(li); +} +EXPORT_SYMBOL_GPL(kvm_release_hpt); + +/*************** generic *************/ + +static LIST_HEAD(free_linears); +static DEFINE_SPINLOCK(linear_lock); + +static void __init kvm_linear_init_one(ulong size, int count, int type) +{ + unsigned long i; + unsigned long j, npages; + void *linear; + struct page *pg; + const char *typestr; + struct kvmppc_linear_info *linear_info; + + if (!count) + return; + + typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; + + npages = size >> PAGE_SHIFT; + linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); + for (i = 0; i < count; ++i) { + linear = alloc_bootmem_align(size, size); + pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, + size >> 20); + linear_info[i].base_virt = linear; + linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; + linear_info[i].npages = npages; + linear_info[i].type = type; + list_add_tail(&linear_info[i].list, &free_linears); + atomic_set(&linear_info[i].use_count, 0); + + pg = pfn_to_page(linear_info[i].base_pfn); + for (j = 0; j < npages; ++j) { + atomic_inc(&pg->_count); + ++pg; + } + } +} + +static struct kvmppc_linear_info *kvm_alloc_linear(int type) +{ + struct kvmppc_linear_info *ri, *ret; + + ret = NULL; + spin_lock(&linear_lock); + list_for_each_entry(ri, &free_linears, list) { + if (ri->type != type) + continue; + + list_del(&ri->list); + atomic_inc(&ri->use_count); + memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); + ret = ri; + break; + } + spin_unlock(&linear_lock); + return ret; +} + +static void kvm_release_linear(struct kvmppc_linear_info *ri) +{ + if (atomic_dec_and_test(&ri->use_count)) { + spin_lock(&linear_lock); + list_add_tail(&ri->list, &free_linears); + spin_unlock(&linear_lock); + + } +} + +/* + * Called at boot time while the bootmem allocator is active, + * to allocate contiguous physical memory for the hash page + * tables for guests. + */ +void __init kvm_linear_init(void) +{ + /* HPT */ + kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT); + + /* RMA */ + /* Only do this on PPC970 in HV mode */ + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_201)) + return; + + if (!kvm_rma_size || !kvm_rma_count) + return; + + /* Check that the requested size is one supported in hardware */ + if (lpcr_rmls(kvm_rma_size) < 0) { + pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); + return; + } + + kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); +} diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S new file mode 100644 index 00000000..d3fb4df0 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -0,0 +1,170 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * Derived from book3s_interrupts.S, which is: + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> +#include <asm/exception-64s.h> +#include <asm/ppc-opcode.h> + +/***************************************************************************** + * * + * Guest entry / exit code that is in kernel module memory (vmalloc) * + * * + ****************************************************************************/ + +/* Registers: + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcore_entry) + + /* Write correct stack frame */ + mflr r0 + std r0,PPC_LR_STKOFF(r1) + + /* Save host state to the stack */ + stdu r1, -SWITCH_FRAME_SIZE(r1) + + /* Save non-volatile registers (r14 - r31) and CR */ + SAVE_NVGPRS(r1) + mfcr r3 + std r3, _CCR(r1) + + /* Save host DSCR */ +BEGIN_FTR_SECTION + mfspr r3, SPRN_DSCR + std r3, HSTATE_DSCR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Save host DABR */ + mfspr r3, SPRN_DABR + std r3, HSTATE_DABR(r13) + + /* Hard-disable interrupts */ + mfmsr r10 + std r10, HSTATE_HOST_MSR(r13) + rldicl r10,r10,48,1 + rotldi r10,r10,16 + mtmsrd r10,1 + + /* Save host PMU registers and load guest PMU registers */ + /* R4 is live here (vcpu pointer) but not r3 or r5 */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mfspr r7, SPRN_MMCR0 /* save MMCR0 */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ + isync + ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ + lbz r5, LPPACA_PMCINUSE(r3) + cmpwi r5, 0 + beq 31f /* skip if not */ + mfspr r5, SPRN_MMCR1 + mfspr r6, SPRN_MMCRA + std r7, HSTATE_MMCR(r13) + std r5, HSTATE_MMCR + 8(r13) + std r6, HSTATE_MMCR + 16(r13) + mfspr r3, SPRN_PMC1 + mfspr r5, SPRN_PMC2 + mfspr r6, SPRN_PMC3 + mfspr r7, SPRN_PMC4 + mfspr r8, SPRN_PMC5 + mfspr r9, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + stw r3, HSTATE_PMC(r13) + stw r5, HSTATE_PMC + 4(r13) + stw r6, HSTATE_PMC + 8(r13) + stw r7, HSTATE_PMC + 12(r13) + stw r8, HSTATE_PMC + 16(r13) + stw r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION + stw r10, HSTATE_PMC + 24(r13) + stw r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +31: + + /* + * Put whatever is in the decrementer into the + * hypervisor decrementer. + */ + mfspr r8,SPRN_DEC + mftb r7 + mtspr SPRN_HDEC,r8 + extsw r8,r8 + add r8,r8,r7 + std r8,HSTATE_DECEXP(r13) + + /* + * On PPC970, if the guest vcpu has an external interrupt pending, + * send ourselves an IPI so as to interrupt the guest once it + * enables interrupts. (It must have interrupts disabled, + * otherwise we would already have delivered the interrupt.) + */ +BEGIN_FTR_SECTION + ld r0, VCPU_PENDING_EXC(r4) + li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) + oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + and. r0, r0, r7 + beq 32f + mr r31, r4 + lhz r3, PACAPACAINDEX(r13) + bl smp_send_reschedule + nop + mr r4, r31 +32: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + + /* Jump to partition switch code */ + bl .kvmppc_hv_entry_trampoline + nop + +/* + * We return here in virtual mode after the guest exits + * with something that we can't handle in real mode. + * Interrupts are enabled again at this point. + */ + +.global kvmppc_handler_highmem +kvmppc_handler_highmem: + + /* + * Register usage at this point: + * + * R1 = host R1 + * R2 = host R2 + * R12 = exit handler id + * R13 = PACA + */ + + /* Restore non-volatile host registers (r14 - r31) and CR */ + REST_NVGPRS(r1) + ld r4, _CCR(r1) + mtcr r4 + + addi r1, r1, SWITCH_FRAME_SIZE + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c new file mode 100644 index 00000000..cec4dadd --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -0,0 +1,817 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/hugetlb.h> +#include <linux/module.h> + +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> +#include <asm/hvcall.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> + +/* Translate address of a vmalloc'd thing to a linear map address */ +static void *real_vmalloc_addr(void *x) +{ + unsigned long addr = (unsigned long) x; + pte_t *p; + + p = find_linux_pte(swapper_pg_dir, addr); + if (!p || !pte_present(*p)) + return NULL; + /* assume we don't have huge pages in vmalloc space... */ + addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); + return __va(addr); +} + +/* + * Add this HPTE into the chain for the real page. + * Must be called with the chain locked; it unlocks the chain. + */ +void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, + unsigned long *rmap, long pte_index, int realmode) +{ + struct revmap_entry *head, *tail; + unsigned long i; + + if (*rmap & KVMPPC_RMAP_PRESENT) { + i = *rmap & KVMPPC_RMAP_INDEX; + head = &kvm->arch.revmap[i]; + if (realmode) + head = real_vmalloc_addr(head); + tail = &kvm->arch.revmap[head->back]; + if (realmode) + tail = real_vmalloc_addr(tail); + rev->forw = i; + rev->back = head->back; + tail->forw = pte_index; + head->back = pte_index; + } else { + rev->forw = rev->back = pte_index; + i = pte_index; + } + smp_wmb(); + *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ +} +EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); + +/* Remove this HPTE from the chain for a real page */ +static void remove_revmap_chain(struct kvm *kvm, long pte_index, + struct revmap_entry *rev, + unsigned long hpte_v, unsigned long hpte_r) +{ + struct revmap_entry *next, *prev; + unsigned long gfn, ptel, head; + struct kvm_memory_slot *memslot; + unsigned long *rmap; + unsigned long rcbits; + + rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); + ptel = rev->guest_rpte |= rcbits; + gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); + memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + return; + + rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]); + lock_rmap(rmap); + + head = *rmap & KVMPPC_RMAP_INDEX; + next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]); + prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]); + next->back = rev->back; + prev->forw = rev->forw; + if (head == pte_index) { + head = rev->forw; + if (head == pte_index) + *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); + else + *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; + } + *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; + unlock_rmap(rmap); +} + +static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, + int writing, unsigned long *pte_sizep) +{ + pte_t *ptep; + unsigned long ps = *pte_sizep; + unsigned int shift; + + ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift); + if (!ptep) + return __pte(0); + if (shift) + *pte_sizep = 1ul << shift; + else + *pte_sizep = PAGE_SIZE; + if (ps > *pte_sizep) + return __pte(0); + if (!pte_present(*ptep)) + return __pte(0); + return kvmppc_read_update_linux_pte(ptep, writing); +} + +static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) +{ + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); + hpte[0] = hpte_v; +} + +long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long i, pa, gpa, gfn, psize; + unsigned long slot_fn, hva; + unsigned long *hpte; + struct revmap_entry *rev; + unsigned long g_ptel = ptel; + struct kvm_memory_slot *memslot; + unsigned long *physp, pte_size; + unsigned long is_io; + unsigned long *rmap; + pte_t pte; + unsigned int writing; + unsigned long mmu_seq; + unsigned long rcbits; + bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; + + psize = hpte_page_size(pteh, ptel); + if (!psize) + return H_PARAMETER; + writing = hpte_is_writable(ptel); + pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); + + /* used later to detect if we might have been invalidated */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + + /* Find the memslot (if any) for this address */ + gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); + gfn = gpa >> PAGE_SHIFT; + memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); + pa = 0; + is_io = ~0ul; + rmap = NULL; + if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { + /* PPC970 can't do emulated MMIO */ + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return H_PARAMETER; + /* Emulated MMIO - mark this with key=31 */ + pteh |= HPTE_V_ABSENT; + ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; + goto do_insert; + } + + /* Check if the requested page fits entirely in the memslot. */ + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; + slot_fn = gfn - memslot->base_gfn; + rmap = &memslot->rmap[slot_fn]; + + if (!kvm->arch.using_mmu_notifiers) { + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) + return H_PARAMETER; + physp += slot_fn; + if (realmode) + physp = real_vmalloc_addr(physp); + pa = *physp; + if (!pa) + return H_TOO_HARD; + is_io = pa & (HPTE_R_I | HPTE_R_W); + pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); + pa &= PAGE_MASK; + } else { + /* Translate to host virtual address */ + hva = gfn_to_hva_memslot(memslot, gfn); + + /* Look up the Linux PTE for the backing page */ + pte_size = psize; + pte = lookup_linux_pte(vcpu, hva, writing, &pte_size); + if (pte_present(pte)) { + if (writing && !pte_write(pte)) + /* make the actual HPTE be read-only */ + ptel = hpte_make_readonly(ptel); + is_io = hpte_cache_bits(pte_val(pte)); + pa = pte_pfn(pte) << PAGE_SHIFT; + } + } + if (pte_size < psize) + return H_PARAMETER; + if (pa && pte_size > psize) + pa |= gpa & (pte_size - 1); + + ptel &= ~(HPTE_R_PP0 - psize); + ptel |= pa; + + if (pa) + pteh |= HPTE_V_VALID; + else + pteh |= HPTE_V_ABSENT; + + /* Check WIMG */ + if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { + if (is_io) + return H_PARAMETER; + /* + * Allow guest to map emulated device memory as + * uncacheable, but actually make it cacheable. + */ + ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); + ptel |= HPTE_R_M; + } + + /* Find and lock the HPTEG slot to use */ + do_insert: + if (pte_index >= HPT_NPTE) + return H_PARAMETER; + if (likely((flags & H_EXACT) == 0)) { + pte_index &= ~7UL; + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + for (i = 0; i < 8; ++i) { + if ((*hpte & HPTE_V_VALID) == 0 && + try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | + HPTE_V_ABSENT)) + break; + hpte += 2; + } + if (i == 8) { + /* + * Since try_lock_hpte doesn't retry (not even stdcx. + * failures), it could be that there is a free slot + * but we transiently failed to lock it. Try again, + * actually locking each slot and checking it. + */ + hpte -= 16; + for (i = 0; i < 8; ++i) { + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT))) + break; + *hpte &= ~HPTE_V_HVLOCK; + hpte += 2; + } + if (i == 8) + return H_PTEG_FULL; + } + pte_index += i; + } else { + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | + HPTE_V_ABSENT)) { + /* Lock the slot and check again */ + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { + *hpte &= ~HPTE_V_HVLOCK; + return H_PTEG_FULL; + } + } + } + + /* Save away the guest's idea of the second HPTE dword */ + rev = &kvm->arch.revmap[pte_index]; + if (realmode) + rev = real_vmalloc_addr(rev); + if (rev) + rev->guest_rpte = g_ptel; + + /* Link HPTE into reverse-map chain */ + if (pteh & HPTE_V_VALID) { + if (realmode) + rmap = real_vmalloc_addr(rmap); + lock_rmap(rmap); + /* Check for pending invalidations under the rmap chain lock */ + if (kvm->arch.using_mmu_notifiers && + mmu_notifier_retry(vcpu, mmu_seq)) { + /* inval in progress, write a non-present HPTE */ + pteh |= HPTE_V_ABSENT; + pteh &= ~HPTE_V_VALID; + unlock_rmap(rmap); + } else { + kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, + realmode); + /* Only set R/C in real HPTE if already set in *rmap */ + rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; + ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); + } + } + + hpte[1] = ptel; + + /* Write the first HPTE dword, unlocking the HPTE and making it valid */ + eieio(); + hpte[0] = pteh; + asm volatile("ptesync" : : : "memory"); + + vcpu->arch.gpr[4] = pte_index; + return H_SUCCESS; +} +EXPORT_SYMBOL_GPL(kvmppc_h_enter); + +#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) + +static inline int try_lock_tlbie(unsigned int *lock) +{ + unsigned int tmp, old; + unsigned int token = LOCK_TOKEN; + + asm volatile("1:lwarx %1,0,%2\n" + " cmpwi cr0,%1,0\n" + " bne 2f\n" + " stwcx. %3,0,%2\n" + " bne- 1b\n" + " isync\n" + "2:" + : "=&r" (tmp), "=&r" (old) + : "r" (lock), "r" (token) + : "cc", "memory"); + return old == 0; +} + +long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *hpte; + unsigned long v, r, rb; + struct revmap_entry *rev; + + if (pte_index >= HPT_NPTE) + return H_PARAMETER; + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || + ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { + hpte[0] &= ~HPTE_V_HVLOCK; + return H_NOT_FOUND; + } + + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + v = hpte[0] & ~HPTE_V_HVLOCK; + if (v & HPTE_V_VALID) { + hpte[0] &= ~HPTE_V_VALID; + rb = compute_tlbie_rb(v, hpte[1], pte_index); + if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) { + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" + : : "r" (rb), "r" (kvm->arch.lpid)); + asm volatile("ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + asm volatile("ptesync" : : : "memory"); + asm volatile("tlbiel %0" : : "r" (rb)); + asm volatile("ptesync" : : : "memory"); + } + /* Read PTE low word after tlbie to get final R/C values */ + remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); + } + r = rev->guest_rpte; + unlock_hpte(hpte, 0); + + vcpu->arch.gpr[4] = v; + vcpu->arch.gpr[5] = r; + return H_SUCCESS; +} + +long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *args = &vcpu->arch.gpr[4]; + unsigned long *hp, *hptes[4], tlbrb[4]; + long int i, j, k, n, found, indexes[4]; + unsigned long flags, req, pte_index, rcbits; + long int local = 0; + long int ret = H_SUCCESS; + struct revmap_entry *rev, *revs[4]; + + if (atomic_read(&kvm->online_vcpus) == 1) + local = 1; + for (i = 0; i < 4 && ret == H_SUCCESS; ) { + n = 0; + for (; i < 4; ++i) { + j = i * 2; + pte_index = args[j]; + flags = pte_index >> 56; + pte_index &= ((1ul << 56) - 1); + req = flags >> 6; + flags &= 3; + if (req == 3) { /* no more requests */ + i = 4; + break; + } + if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) { + /* parameter error */ + args[j] = ((0xa0 | flags) << 56) + pte_index; + ret = H_PARAMETER; + break; + } + hp = (unsigned long *) + (kvm->arch.hpt_virt + (pte_index << 4)); + /* to avoid deadlock, don't spin except for first */ + if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { + if (n) + break; + while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) + cpu_relax(); + } + found = 0; + if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) { + switch (flags & 3) { + case 0: /* absolute */ + found = 1; + break; + case 1: /* andcond */ + if (!(hp[0] & args[j + 1])) + found = 1; + break; + case 2: /* AVPN */ + if ((hp[0] & ~0x7fUL) == args[j + 1]) + found = 1; + break; + } + } + if (!found) { + hp[0] &= ~HPTE_V_HVLOCK; + args[j] = ((0x90 | flags) << 56) + pte_index; + continue; + } + + args[j] = ((0x80 | flags) << 56) + pte_index; + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + + if (!(hp[0] & HPTE_V_VALID)) { + /* insert R and C bits from PTE */ + rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); + args[j] |= rcbits << (56 - 5); + hp[0] = 0; + continue; + } + + hp[0] &= ~HPTE_V_VALID; /* leave it locked */ + tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); + indexes[n] = j; + hptes[n] = hp; + revs[n] = rev; + ++n; + } + + if (!n) + break; + + /* Now that we've collected a batch, do the tlbies */ + if (!local) { + while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + for (k = 0; k < n; ++k) + asm volatile(PPC_TLBIE(%1,%0) : : + "r" (tlbrb[k]), + "r" (kvm->arch.lpid)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + asm volatile("ptesync" : : : "memory"); + for (k = 0; k < n; ++k) + asm volatile("tlbiel %0" : : "r" (tlbrb[k])); + asm volatile("ptesync" : : : "memory"); + } + + /* Read PTE low words after tlbie to get final R/C values */ + for (k = 0; k < n; ++k) { + j = indexes[k]; + pte_index = args[j] & ((1ul << 56) - 1); + hp = hptes[k]; + rev = revs[k]; + remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); + rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); + args[j] |= rcbits << (56 - 5); + hp[0] = 0; + } + } + + return ret; +} + +long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *hpte; + struct revmap_entry *rev; + unsigned long v, r, rb, mask, bits; + + if (pte_index >= HPT_NPTE) + return H_PARAMETER; + + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { + hpte[0] &= ~HPTE_V_HVLOCK; + return H_NOT_FOUND; + } + + if (atomic_read(&kvm->online_vcpus) == 1) + flags |= H_LOCAL; + v = hpte[0]; + bits = (flags << 55) & HPTE_R_PP0; + bits |= (flags << 48) & HPTE_R_KEY_HI; + bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); + + /* Update guest view of 2nd HPTE dword */ + mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | + HPTE_R_KEY_HI | HPTE_R_KEY_LO; + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + if (rev) { + r = (rev->guest_rpte & ~mask) | bits; + rev->guest_rpte = r; + } + r = (hpte[1] & ~mask) | bits; + + /* Update HPTE */ + if (v & HPTE_V_VALID) { + rb = compute_tlbie_rb(v, r, pte_index); + hpte[0] = v & ~HPTE_V_VALID; + if (!(flags & H_LOCAL)) { + while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" + : : "r" (rb), "r" (kvm->arch.lpid)); + asm volatile("ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + asm volatile("ptesync" : : : "memory"); + asm volatile("tlbiel %0" : : "r" (rb)); + asm volatile("ptesync" : : : "memory"); + } + } + hpte[1] = r; + eieio(); + hpte[0] = v & ~HPTE_V_HVLOCK; + asm volatile("ptesync" : : : "memory"); + return H_SUCCESS; +} + +long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *hpte, v, r; + int i, n = 1; + struct revmap_entry *rev = NULL; + + if (pte_index >= HPT_NPTE) + return H_PARAMETER; + if (flags & H_READ_4) { + pte_index &= ~3; + n = 4; + } + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + for (i = 0; i < n; ++i, ++pte_index) { + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + v = hpte[0] & ~HPTE_V_HVLOCK; + r = hpte[1]; + if (v & HPTE_V_ABSENT) { + v &= ~HPTE_V_ABSENT; + v |= HPTE_V_VALID; + } + if (v & HPTE_V_VALID) + r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); + vcpu->arch.gpr[4 + i * 2] = v; + vcpu->arch.gpr[5 + i * 2] = r; + } + return H_SUCCESS; +} + +void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, + unsigned long pte_index) +{ + unsigned long rb; + + hptep[0] &= ~HPTE_V_VALID; + rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" + : : "r" (rb), "r" (kvm->arch.lpid)); + asm volatile("ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; +} +EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); + +void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, + unsigned long pte_index) +{ + unsigned long rb; + unsigned char rbyte; + + rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); + rbyte = (hptep[1] & ~HPTE_R_R) >> 8; + /* modify only the second-last byte, which contains the ref bit */ + *((char *)hptep + 14) = rbyte; + while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" + : : "r" (rb), "r" (kvm->arch.lpid)); + asm volatile("ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; +} +EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); + +static int slb_base_page_shift[4] = { + 24, /* 16M */ + 16, /* 64k */ + 34, /* 16G */ + 20, /* 1M, unsupported */ +}; + +long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, + unsigned long valid) +{ + unsigned int i; + unsigned int pshift; + unsigned long somask; + unsigned long vsid, hash; + unsigned long avpn; + unsigned long *hpte; + unsigned long mask, val; + unsigned long v, r; + + /* Get page shift, work out hash and AVPN etc. */ + mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; + val = 0; + pshift = 12; + if (slb_v & SLB_VSID_L) { + mask |= HPTE_V_LARGE; + val |= HPTE_V_LARGE; + pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; + } + if (slb_v & SLB_VSID_B_1T) { + somask = (1UL << 40) - 1; + vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; + vsid ^= vsid << 25; + } else { + somask = (1UL << 28) - 1; + vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; + } + hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK; + avpn = slb_v & ~(somask >> 16); /* also includes B */ + avpn |= (eaddr & somask) >> 16; + + if (pshift >= 24) + avpn &= ~((1UL << (pshift - 16)) - 1); + else + avpn &= ~0x7fUL; + val |= avpn; + + for (;;) { + hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7)); + + for (i = 0; i < 16; i += 2) { + /* Read the PTE racily */ + v = hpte[i] & ~HPTE_V_HVLOCK; + + /* Check valid/absent, hash, segment size and AVPN */ + if (!(v & valid) || (v & mask) != val) + continue; + + /* Lock the PTE and read it under the lock */ + while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) + cpu_relax(); + v = hpte[i] & ~HPTE_V_HVLOCK; + r = hpte[i+1]; + + /* + * Check the HPTE again, including large page size + * Since we don't currently allow any MPSS (mixed + * page-size segment) page sizes, it is sufficient + * to check against the actual page size. + */ + if ((v & valid) && (v & mask) == val && + hpte_page_size(v, r) == (1ul << pshift)) + /* Return with the HPTE still locked */ + return (hash << 3) + (i >> 1); + + /* Unlock and move on */ + hpte[i] = v; + } + + if (val & HPTE_V_SECONDARY) + break; + val |= HPTE_V_SECONDARY; + hash = hash ^ HPT_HASH_MASK; + } + return -1; +} +EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); + +/* + * Called in real mode to check whether an HPTE not found fault + * is due to accessing a paged-out page or an emulated MMIO page, + * or if a protection fault is due to accessing a page that the + * guest wanted read/write access to but which we made read-only. + * Returns a possibly modified status (DSISR) value if not + * (i.e. pass the interrupt to the guest), + * -1 to pass the fault up to host kernel mode code, -2 to do that + * and also load the instruction word (for MMIO emulation), + * or 0 if we should make the guest retry the access. + */ +long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, + unsigned long slb_v, unsigned int status, bool data) +{ + struct kvm *kvm = vcpu->kvm; + long int index; + unsigned long v, r, gr; + unsigned long *hpte; + unsigned long valid; + struct revmap_entry *rev; + unsigned long pp, key; + + /* For protection fault, expect to find a valid HPTE */ + valid = HPTE_V_VALID; + if (status & DSISR_NOHPTE) + valid |= HPTE_V_ABSENT; + + index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); + if (index < 0) { + if (status & DSISR_NOHPTE) + return status; /* there really was no HPTE */ + return 0; /* for prot fault, HPTE disappeared */ + } + hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); + v = hpte[0] & ~HPTE_V_HVLOCK; + r = hpte[1]; + rev = real_vmalloc_addr(&kvm->arch.revmap[index]); + gr = rev->guest_rpte; + + unlock_hpte(hpte, v); + + /* For not found, if the HPTE is valid by now, retry the instruction */ + if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) + return 0; + + /* Check access permissions to the page */ + pp = gr & (HPTE_R_PP0 | HPTE_R_PP); + key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; + status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ + if (!data) { + if (gr & (HPTE_R_N | HPTE_R_G)) + return status | SRR1_ISI_N_OR_G; + if (!hpte_read_permission(pp, slb_v & key)) + return status | SRR1_ISI_PROT; + } else if (status & DSISR_ISSTORE) { + /* check write permission */ + if (!hpte_write_permission(pp, slb_v & key)) + return status | DSISR_PROTFAULT; + } else { + if (!hpte_read_permission(pp, slb_v & key)) + return status | DSISR_PROTFAULT; + } + + /* Check storage key, if applicable */ + if (data && (vcpu->arch.shregs.msr & MSR_DR)) { + unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); + if (status & DSISR_ISSTORE) + perm >>= 1; + if (perm & 1) + return status | DSISR_KEYFAULT; + } + + /* Save HPTE info for virtual-mode handler */ + vcpu->arch.pgfault_addr = addr; + vcpu->arch.pgfault_index = index; + vcpu->arch.pgfault_hpte[0] = v; + vcpu->arch.pgfault_hpte[1] = r; + + /* Check the storage key to see if it is possibly emulated MMIO */ + if (data && (vcpu->arch.shregs.msr & MSR_IR) && + (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) + return -2; /* MMIO emulation - load instr word */ + + return -1; /* send fault up to host kernel mode */ +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S new file mode 100644 index 00000000..24b23a43 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -0,0 +1,1712 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * Derived from book3s_rmhandlers.S and other files, which are: + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/hvcall.h> +#include <asm/asm-offsets.h> +#include <asm/exception-64s.h> + +/***************************************************************************** + * * + * Real Mode handlers that need to be in the linear mapping * + * * + ****************************************************************************/ + + .globl kvmppc_skip_interrupt +kvmppc_skip_interrupt: + mfspr r13,SPRN_SRR0 + addi r13,r13,4 + mtspr SPRN_SRR0,r13 + GET_SCRATCH0(r13) + rfid + b . + + .globl kvmppc_skip_Hinterrupt +kvmppc_skip_Hinterrupt: + mfspr r13,SPRN_HSRR0 + addi r13,r13,4 + mtspr SPRN_HSRR0,r13 + GET_SCRATCH0(r13) + hrfid + b . + +/* + * Call kvmppc_hv_entry in real mode. + * Must be called with interrupts hard-disabled. + * + * Input Registers: + * + * LR = return address to continue at after eventually re-enabling MMU + */ +_GLOBAL(kvmppc_hv_entry_trampoline) + mfmsr r10 + LOAD_REG_ADDR(r5, kvmppc_hv_entry) + li r0,MSR_RI + andc r0,r10,r0 + li r6,MSR_IR | MSR_DR + andc r6,r10,r6 + mtmsrd r0,1 /* clear RI in MSR */ + mtsrr0 r5 + mtsrr1 r6 + RFI + +#define ULONG_SIZE 8 +#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) + +/****************************************************************************** + * * + * Entry code * + * * + *****************************************************************************/ + +#define XICS_XIRR 4 +#define XICS_QIRR 0xc + +/* + * We come in here when wakened from nap mode on a secondary hw thread. + * Relocation is off and most register values are lost. + * r13 points to the PACA. + */ + .globl kvm_start_guest +kvm_start_guest: + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + ld r2,PACATOC(r13) + + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede + + /* get vcpu pointer */ + ld r4, HSTATE_KVM_VCPU(r13) + + /* We got here with an IPI; clear it */ + ld r5, HSTATE_XICS_PHYS(r13) + li r0, 0xff + li r6, XICS_QIRR + li r7, XICS_XIRR + lwzcix r8, r5, r7 /* ack the interrupt */ + sync + stbcix r0, r5, r6 /* clear it */ + stwcix r8, r5, r7 /* EOI it */ + + /* NV GPR values from power7_idle() will no longer be valid */ + stb r0, PACA_NAPSTATELOST(r13) + +.global kvmppc_hv_entry +kvmppc_hv_entry: + + /* Required state: + * + * R4 = vcpu pointer + * MSR = ~IR|DR + * R13 = PACA + * R1 = host R1 + * all other volatile GPRS = free + */ + mflr r0 + std r0, HSTATE_VMHANDLER(r13) + + ld r14, VCPU_GPR(r14)(r4) + ld r15, VCPU_GPR(r15)(r4) + ld r16, VCPU_GPR(r16)(r4) + ld r17, VCPU_GPR(r17)(r4) + ld r18, VCPU_GPR(r18)(r4) + ld r19, VCPU_GPR(r19)(r4) + ld r20, VCPU_GPR(r20)(r4) + ld r21, VCPU_GPR(r21)(r4) + ld r22, VCPU_GPR(r22)(r4) + ld r23, VCPU_GPR(r23)(r4) + ld r24, VCPU_GPR(r24)(r4) + ld r25, VCPU_GPR(r25)(r4) + ld r26, VCPU_GPR(r26)(r4) + ld r27, VCPU_GPR(r27)(r4) + ld r28, VCPU_GPR(r28)(r4) + ld r29, VCPU_GPR(r29)(r4) + ld r30, VCPU_GPR(r30)(r4) + ld r31, VCPU_GPR(r31)(r4) + + /* Load guest PMU registers */ + /* R4 is live here (vcpu pointer) */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + isync + lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ + lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ + lwz r6, VCPU_PMC + 8(r4) + lwz r7, VCPU_PMC + 12(r4) + lwz r8, VCPU_PMC + 16(r4) + lwz r9, VCPU_PMC + 20(r4) +BEGIN_FTR_SECTION + lwz r10, VCPU_PMC + 24(r4) + lwz r11, VCPU_PMC + 28(r4) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_PMC1, r3 + mtspr SPRN_PMC2, r5 + mtspr SPRN_PMC3, r6 + mtspr SPRN_PMC4, r7 + mtspr SPRN_PMC5, r8 + mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + ld r3, VCPU_MMCR(r4) + ld r5, VCPU_MMCR + 8(r4) + ld r6, VCPU_MMCR + 16(r4) + mtspr SPRN_MMCR1, r5 + mtspr SPRN_MMCRA, r6 + mtspr SPRN_MMCR0, r3 + isync + + /* Load up FP, VMX and VSX registers */ + bl kvmppc_load_fp + +BEGIN_FTR_SECTION + /* Switch DSCR to guest value */ + ld r5, VCPU_DSCR(r4) + mtspr SPRN_DSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + stw r3,VCPU_DEC(r4) + + ld r5, VCPU_SPRG0(r4) + ld r6, VCPU_SPRG1(r4) + ld r7, VCPU_SPRG2(r4) + ld r8, VCPU_SPRG3(r4) + mtspr SPRN_SPRG0, r5 + mtspr SPRN_SPRG1, r6 + mtspr SPRN_SPRG2, r7 + mtspr SPRN_SPRG3, r8 + + /* Save R1 in the PACA */ + std r1, HSTATE_HOST_R1(r13) + + /* Increment yield count if they have a VPA */ + ld r3, VCPU_VPA(r4) + cmpdi r3, 0 + beq 25f + lwz r5, LPPACA_YIELDCOUNT(r3) + addi r5, r5, 1 + stw r5, LPPACA_YIELDCOUNT(r3) +25: + /* Load up DAR and DSISR */ + ld r5, VCPU_DAR(r4) + lwz r6, VCPU_DSISR(r4) + mtspr SPRN_DAR, r5 + mtspr SPRN_DSISR, r6 + + /* Set partition DABR */ + li r5,3 + ld r6,VCPU_DABR(r4) + mtspr SPRN_DABRX,r5 + mtspr SPRN_DABR,r6 + +BEGIN_FTR_SECTION + /* Restore AMR and UAMOR, set AMOR to all 1s */ + ld r5,VCPU_AMR(r4) + ld r6,VCPU_UAMOR(r4) + li r7,-1 + mtspr SPRN_AMR,r5 + mtspr SPRN_UAMOR,r6 + mtspr SPRN_AMOR,r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Clear out SLB */ + li r6,0 + slbmte r6,r6 + slbia + ptesync + +BEGIN_FTR_SECTION + b 30f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* + * POWER7 host -> guest partition switch code. + * We don't have to lock against concurrent tlbies, + * but we do have to coordinate across hardware threads. + */ + /* Increment entry count iff exit count is zero. */ + ld r5,HSTATE_KVM_VCORE(r13) + addi r9,r5,VCORE_ENTRY_EXIT +21: lwarx r3,0,r9 + cmpwi r3,0x100 /* any threads starting to exit? */ + bge secondary_too_late /* if so we're too late to the party */ + addi r3,r3,1 + stwcx. r3,0,r9 + bne 21b + + /* Primary thread switches to guest partition. */ + ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ + lwz r6,VCPU_PTID(r4) + cmpwi r6,0 + bne 20f + ld r6,KVM_SDR1(r9) + lwz r7,KVM_LPID(r9) + li r0,LPID_RSVD /* switch to reserved LPID */ + mtspr SPRN_LPID,r0 + ptesync + mtspr SPRN_SDR1,r6 /* switch to partition page table */ + mtspr SPRN_LPID,r7 + isync + li r0,1 + stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ + b 10f + + /* Secondary threads wait for primary to have done partition switch */ +20: lbz r0,VCORE_IN_GUEST(r5) + cmpwi r0,0 + beq 20b + + /* Set LPCR and RMOR. */ +10: ld r8,KVM_LPCR(r9) + mtspr SPRN_LPCR,r8 + ld r8,KVM_RMOR(r9) + mtspr SPRN_RMOR,r8 + isync + + /* Check if HDEC expires soon */ + mfspr r3,SPRN_HDEC + cmpwi r3,10 + li r12,BOOK3S_INTERRUPT_HV_DECREMENTER + mr r9,r4 + blt hdec_soon + + /* + * Invalidate the TLB if we could possibly have stale TLB + * entries for this partition on this core due to the use + * of tlbiel. + * XXX maybe only need this on primary thread? + */ + ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ + lwz r5,VCPU_VCPUID(r4) + lhz r6,PACAPACAINDEX(r13) + rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */ + lhz r8,VCPU_LAST_CPU(r4) + sldi r7,r6,1 /* see if this is the same vcpu */ + add r7,r7,r9 /* as last ran on this pcpu */ + lhz r0,KVM_LAST_VCPU(r7) + cmpw r6,r8 /* on the same cpu core as last time? */ + bne 3f + cmpw r0,r5 /* same vcpu as this core last ran? */ + beq 1f +3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */ + sth r5,KVM_LAST_VCPU(r7) + li r6,128 + mtctr r6 + li r7,0x800 /* IS field = 0b10 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: + + /* Save purr/spurr */ + mfspr r5,SPRN_PURR + mfspr r6,SPRN_SPURR + std r5,HSTATE_PURR(r13) + std r6,HSTATE_SPURR(r13) + ld r7,VCPU_PURR(r4) + ld r8,VCPU_SPURR(r4) + mtspr SPRN_PURR,r7 + mtspr SPRN_SPURR,r8 + b 31f + + /* + * PPC970 host -> guest partition switch code. + * We have to lock against concurrent tlbies, + * using native_tlbie_lock to lock against host tlbies + * and kvm->arch.tlbie_lock to lock against guest tlbies. + * We also have to invalidate the TLB since its + * entries aren't tagged with the LPID. + */ +30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ + + /* first take native_tlbie_lock */ + .section ".toc","aw" +toc_tlbie_lock: + .tc native_tlbie_lock[TC],native_tlbie_lock + .previous + ld r3,toc_tlbie_lock@toc(2) + lwz r8,PACA_LOCK_TOKEN(r13) +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */ + li r0,0x18f + rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ + or r0,r7,r0 + ptesync + sync + mtspr SPRN_HID4,r0 /* switch to reserved LPID */ + isync + li r0,0 + stw r0,0(r3) /* drop native_tlbie_lock */ + + /* invalidate the whole TLB */ + li r0,256 + mtctr r0 + li r6,0 +25: tlbiel r6 + addi r6,r6,0x1000 + bdnz 25b + ptesync + + /* Take the guest's tlbie_lock */ + addi r3,r9,KVM_TLBIE_LOCK +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + ld r6,KVM_SDR1(r9) + mtspr SPRN_SDR1,r6 /* switch to partition page table */ + + /* Set up HID4 with the guest's LPID etc. */ + sync + mtspr SPRN_HID4,r7 + isync + + /* drop the guest's tlbie_lock */ + li r0,0 + stw r0,0(r3) + + /* Check if HDEC expires soon */ + mfspr r3,SPRN_HDEC + cmpwi r3,10 + li r12,BOOK3S_INTERRUPT_HV_DECREMENTER + mr r9,r4 + blt hdec_soon + + /* Enable HDEC interrupts */ + mfspr r0,SPRN_HID0 + li r3,1 + rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 + sync + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + + /* Load up guest SLB entries */ +31: lwz r5,VCPU_SLB_MAX(r4) + cmpwi r5,0 + beq 9f + mtctr r5 + addi r6,r4,VCPU_SLB +1: ld r8,VCPU_SLB_E(r6) + ld r9,VCPU_SLB_V(r6) + slbmte r9,r8 + addi r6,r6,VCPU_SLB_SIZE + bdnz 1b +9: + + /* Restore state of CTRL run bit; assume 1 on entry */ + lwz r5,VCPU_CTRL(r4) + andi. r5,r5,1 + bne 4f + mfspr r6,SPRN_CTRLF + clrrdi r6,r6,1 + mtspr SPRN_CTRLT,r6 +4: + ld r6, VCPU_CTR(r4) + lwz r7, VCPU_XER(r4) + + mtctr r6 + mtxer r7 + +kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ + ld r6, VCPU_SRR0(r4) + ld r7, VCPU_SRR1(r4) + ld r10, VCPU_PC(r4) + ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ + + rldicl r11, r11, 63 - MSR_HV_LG, 1 + rotldi r11, r11, 1 + MSR_HV_LG + ori r11, r11, MSR_ME + + /* Check if we can deliver an external or decrementer interrupt now */ + ld r0,VCPU_PENDING_EXC(r4) + li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) + oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + and r0,r0,r8 + cmpdi cr1,r0,0 + andi. r0,r11,MSR_EE + beq cr1,11f +BEGIN_FTR_SECTION + mfspr r8,SPRN_LPCR + ori r8,r8,LPCR_MER + mtspr SPRN_LPCR,r8 + isync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + beq 5f + li r0,BOOK3S_INTERRUPT_EXTERNAL +12: mr r6,r10 + mr r10,r0 + mr r7,r11 + li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11,r11,63 + b 5f +11: beq 5f + mfspr r0,SPRN_DEC + cmpwi r0,0 + li r0,BOOK3S_INTERRUPT_DECREMENTER + blt 12b + + /* Move SRR0 and SRR1 into the respective regs */ +5: mtspr SPRN_SRR0, r6 + mtspr SPRN_SRR1, r7 + li r0,0 + stb r0,VCPU_CEDED(r4) /* cancel cede */ + +fast_guest_return: + mtspr SPRN_HSRR0,r10 + mtspr SPRN_HSRR1,r11 + + /* Activate guest mode, so faults get handled by KVM */ + li r9, KVM_GUEST_MODE_GUEST + stb r9, HSTATE_IN_GUEST(r13) + + /* Enter guest */ + + ld r5, VCPU_LR(r4) + lwz r6, VCPU_CR(r4) + mtlr r5 + mtcr r6 + + ld r0, VCPU_GPR(r0)(r4) + ld r1, VCPU_GPR(r1)(r4) + ld r2, VCPU_GPR(r2)(r4) + ld r3, VCPU_GPR(r3)(r4) + ld r5, VCPU_GPR(r5)(r4) + ld r6, VCPU_GPR(r6)(r4) + ld r7, VCPU_GPR(r7)(r4) + ld r8, VCPU_GPR(r8)(r4) + ld r9, VCPU_GPR(r9)(r4) + ld r10, VCPU_GPR(r10)(r4) + ld r11, VCPU_GPR(r11)(r4) + ld r12, VCPU_GPR(r12)(r4) + ld r13, VCPU_GPR(r13)(r4) + + ld r4, VCPU_GPR(r4)(r4) + + hrfid + b . + +/****************************************************************************** + * * + * Exit code * + * * + *****************************************************************************/ + +/* + * We come here from the first-level interrupt handlers. + */ + .globl kvmppc_interrupt +kvmppc_interrupt: + /* + * Register contents: + * R12 = interrupt vector + * R13 = PACA + * guest CR, R12 saved in shadow VCPU SCRATCH1/0 + * guest R13 saved in SPRN_SCRATCH0 + */ + /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ + std r9, HSTATE_HOST_R2(r13) + ld r9, HSTATE_KVM_VCPU(r13) + + /* Save registers */ + + std r0, VCPU_GPR(r0)(r9) + std r1, VCPU_GPR(r1)(r9) + std r2, VCPU_GPR(r2)(r9) + std r3, VCPU_GPR(r3)(r9) + std r4, VCPU_GPR(r4)(r9) + std r5, VCPU_GPR(r5)(r9) + std r6, VCPU_GPR(r6)(r9) + std r7, VCPU_GPR(r7)(r9) + std r8, VCPU_GPR(r8)(r9) + ld r0, HSTATE_HOST_R2(r13) + std r0, VCPU_GPR(r9)(r9) + std r10, VCPU_GPR(r10)(r9) + std r11, VCPU_GPR(r11)(r9) + ld r3, HSTATE_SCRATCH0(r13) + lwz r4, HSTATE_SCRATCH1(r13) + std r3, VCPU_GPR(r12)(r9) + stw r4, VCPU_CR(r9) + + /* Restore R1/R2 so we can handle faults */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + + mfspr r10, SPRN_SRR0 + mfspr r11, SPRN_SRR1 + std r10, VCPU_SRR0(r9) + std r11, VCPU_SRR1(r9) + andi. r0, r12, 2 /* need to read HSRR0/1? */ + beq 1f + mfspr r10, SPRN_HSRR0 + mfspr r11, SPRN_HSRR1 + clrrdi r12, r12, 2 +1: std r10, VCPU_PC(r9) + std r11, VCPU_MSR(r9) + + GET_SCRATCH0(r3) + mflr r4 + std r3, VCPU_GPR(r13)(r9) + std r4, VCPU_LR(r9) + + /* Unset guest mode */ + li r0, KVM_GUEST_MODE_NONE + stb r0, HSTATE_IN_GUEST(r13) + + stw r12,VCPU_TRAP(r9) + + /* Save HEIR (HV emulation assist reg) in last_inst + if this is an HEI (HV emulation interrupt, e40) */ + li r3,KVM_INST_FETCH_FAILED +BEGIN_FTR_SECTION + cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST + bne 11f + mfspr r3,SPRN_HEIR +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) +11: stw r3,VCPU_LAST_INST(r9) + + /* these are volatile across C function calls */ + mfctr r3 + mfxer r4 + std r3, VCPU_CTR(r9) + stw r4, VCPU_XER(r9) + +BEGIN_FTR_SECTION + /* If this is a page table miss then see if it's theirs or ours */ + cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE + beq kvmppc_hdsi + cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE + beq kvmppc_hisi +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* See if this is a leftover HDEC interrupt */ + cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER + bne 2f + mfspr r3,SPRN_HDEC + cmpwi r3,0 + bge ignore_hdec +2: + /* See if this is an hcall we can handle in real mode */ + cmpwi r12,BOOK3S_INTERRUPT_SYSCALL + beq hcall_try_real_mode + + /* Check for mediated interrupts (could be done earlier really ...) */ +BEGIN_FTR_SECTION + cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL + bne+ 1f + andi. r0,r11,MSR_EE + beq 1f + mfspr r5,SPRN_LPCR + andi. r0,r5,LPCR_MER + bne bounce_ext_interrupt +1: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +nohpte_cont: +hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ + /* Save DEC */ + mfspr r5,SPRN_DEC + mftb r6 + extsw r5,r5 + add r5,r5,r6 + std r5,VCPU_DEC_EXPIRES(r9) + + /* Save more register state */ + mfdar r6 + mfdsisr r7 + std r6, VCPU_DAR(r9) + stw r7, VCPU_DSISR(r9) +BEGIN_FTR_SECTION + /* don't overwrite fault_dar/fault_dsisr if HDSI */ + cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE + beq 6f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + std r6, VCPU_FAULT_DAR(r9) + stw r7, VCPU_FAULT_DSISR(r9) + + /* Save guest CTRL register, set runlatch to 1 */ +6: mfspr r6,SPRN_CTRLF + stw r6,VCPU_CTRL(r9) + andi. r0,r6,1 + bne 4f + ori r6,r6,1 + mtspr SPRN_CTRLT,r6 +4: + /* Read the guest SLB and save it away */ + lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ + mtctr r0 + li r6,0 + addi r7,r9,VCPU_SLB + li r5,0 +1: slbmfee r8,r6 + andis. r0,r8,SLB_ESID_V@h + beq 2f + add r8,r8,r6 /* put index in */ + slbmfev r3,r6 + std r8,VCPU_SLB_E(r7) + std r3,VCPU_SLB_V(r7) + addi r7,r7,VCPU_SLB_SIZE + addi r5,r5,1 +2: addi r6,r6,1 + bdnz 1b + stw r5,VCPU_SLB_MAX(r9) + + /* + * Save the guest PURR/SPURR + */ +BEGIN_FTR_SECTION + mfspr r5,SPRN_PURR + mfspr r6,SPRN_SPURR + ld r7,VCPU_PURR(r9) + ld r8,VCPU_SPURR(r9) + std r5,VCPU_PURR(r9) + std r6,VCPU_SPURR(r9) + subf r5,r7,r5 + subf r6,r8,r6 + + /* + * Restore host PURR/SPURR and add guest times + * so that the time in the guest gets accounted. + */ + ld r3,HSTATE_PURR(r13) + ld r4,HSTATE_SPURR(r13) + add r3,r3,r5 + add r4,r4,r6 + mtspr SPRN_PURR,r3 + mtspr SPRN_SPURR,r4 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) + + /* Clear out SLB */ + li r5,0 + slbmte r5,r5 + slbia + ptesync + +hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */ +BEGIN_FTR_SECTION + b 32f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* + * POWER7 guest -> host partition switch code. + * We don't have to lock against tlbies but we do + * have to coordinate the hardware threads. + */ + /* Increment the threads-exiting-guest count in the 0xff00 + bits of vcore->entry_exit_count */ + lwsync + ld r5,HSTATE_KVM_VCORE(r13) + addi r6,r5,VCORE_ENTRY_EXIT +41: lwarx r3,0,r6 + addi r0,r3,0x100 + stwcx. r0,0,r6 + bne 41b + lwsync + + /* + * At this point we have an interrupt that we have to pass + * up to the kernel or qemu; we can't handle it in real mode. + * Thus we have to do a partition switch, so we have to + * collect the other threads, if we are the first thread + * to take an interrupt. To do this, we set the HDEC to 0, + * which causes an HDEC interrupt in all threads within 2ns + * because the HDEC register is shared between all 4 threads. + * However, we don't need to bother if this is an HDEC + * interrupt, since the other threads will already be on their + * way here in that case. + */ + cmpwi r3,0x100 /* Are we the first here? */ + bge 43f + cmpwi r3,1 /* Are any other threads in the guest? */ + ble 43f + cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER + beq 40f + li r0,0 + mtspr SPRN_HDEC,r0 +40: + /* + * Send an IPI to any napping threads, since an HDEC interrupt + * doesn't wake CPUs up from nap. + */ + lwz r3,VCORE_NAPPING_THREADS(r5) + lwz r4,VCPU_PTID(r9) + li r0,1 + sld r0,r0,r4 + andc. r3,r3,r0 /* no sense IPI'ing ourselves */ + beq 43f + mulli r4,r4,PACA_SIZE /* get paca for thread 0 */ + subf r6,r4,r13 +42: andi. r0,r3,1 + beq 44f + ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ + li r0,IPI_PRIORITY + li r7,XICS_QIRR + stbcix r0,r7,r8 /* trigger the IPI */ +44: srdi. r3,r3,1 + addi r6,r6,PACA_SIZE + bne 42b + + /* Secondary threads wait for primary to do partition switch */ +43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ + ld r5,HSTATE_KVM_VCORE(r13) + lwz r3,VCPU_PTID(r9) + cmpwi r3,0 + beq 15f + HMT_LOW +13: lbz r3,VCORE_IN_GUEST(r5) + cmpwi r3,0 + bne 13b + HMT_MEDIUM + b 16f + + /* Primary thread waits for all the secondaries to exit guest */ +15: lwz r3,VCORE_ENTRY_EXIT(r5) + srwi r0,r3,8 + clrldi r3,r3,56 + cmpw r3,r0 + bne 15b + isync + + /* Primary thread switches back to host partition */ + ld r6,KVM_HOST_SDR1(r4) + lwz r7,KVM_HOST_LPID(r4) + li r8,LPID_RSVD /* switch to reserved LPID */ + mtspr SPRN_LPID,r8 + ptesync + mtspr SPRN_SDR1,r6 /* switch to partition page table */ + mtspr SPRN_LPID,r7 + isync + li r0,0 + stb r0,VCORE_IN_GUEST(r5) + lis r8,0x7fff /* MAX_INT@h */ + mtspr SPRN_HDEC,r8 + +16: ld r8,KVM_HOST_LPCR(r4) + mtspr SPRN_LPCR,r8 + isync + b 33f + + /* + * PPC970 guest -> host partition switch code. + * We have to lock against concurrent tlbies, and + * we have to flush the whole TLB. + */ +32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ + + /* Take the guest's tlbie_lock */ + lwz r8,PACA_LOCK_TOKEN(r13) + addi r3,r4,KVM_TLBIE_LOCK +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */ + li r0,0x18f + rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ + or r0,r7,r0 + ptesync + sync + mtspr SPRN_HID4,r0 /* switch to reserved LPID */ + isync + li r0,0 + stw r0,0(r3) /* drop guest tlbie_lock */ + + /* invalidate the whole TLB */ + li r0,256 + mtctr r0 + li r6,0 +25: tlbiel r6 + addi r6,r6,0x1000 + bdnz 25b + ptesync + + /* take native_tlbie_lock */ + ld r3,toc_tlbie_lock@toc(2) +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r6,KVM_HOST_SDR1(r4) + mtspr SPRN_SDR1,r6 /* switch to host page table */ + + /* Set up host HID4 value */ + sync + mtspr SPRN_HID4,r7 + isync + li r0,0 + stw r0,0(r3) /* drop native_tlbie_lock */ + + lis r8,0x7fff /* MAX_INT@h */ + mtspr SPRN_HDEC,r8 + + /* Disable HDEC interrupts */ + mfspr r0,SPRN_HID0 + li r3,0 + rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 + sync + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + + /* load host SLB entries */ +33: ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + ld r5,SLBSHADOW_SAVEAREA(r8) + ld r6,SLBSHADOW_SAVEAREA+8(r8) + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr + + /* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION + mfspr r5,SPRN_AMR + mfspr r6,SPRN_UAMOR + std r5,VCPU_AMR(r9) + std r6,VCPU_UAMOR(r9) + li r6,0 + mtspr SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Restore host DABR and DABRX */ + ld r5,HSTATE_DABR(r13) + li r6,7 + mtspr SPRN_DABR,r5 + mtspr SPRN_DABRX,r6 + + /* Switch DSCR back to host value */ +BEGIN_FTR_SECTION + mfspr r8, SPRN_DSCR + ld r7, HSTATE_DSCR(r13) + std r8, VCPU_DSCR(r7) + mtspr SPRN_DSCR, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Save non-volatile GPRs */ + std r14, VCPU_GPR(r14)(r9) + std r15, VCPU_GPR(r15)(r9) + std r16, VCPU_GPR(r16)(r9) + std r17, VCPU_GPR(r17)(r9) + std r18, VCPU_GPR(r18)(r9) + std r19, VCPU_GPR(r19)(r9) + std r20, VCPU_GPR(r20)(r9) + std r21, VCPU_GPR(r21)(r9) + std r22, VCPU_GPR(r22)(r9) + std r23, VCPU_GPR(r23)(r9) + std r24, VCPU_GPR(r24)(r9) + std r25, VCPU_GPR(r25)(r9) + std r26, VCPU_GPR(r26)(r9) + std r27, VCPU_GPR(r27)(r9) + std r28, VCPU_GPR(r28)(r9) + std r29, VCPU_GPR(r29)(r9) + std r30, VCPU_GPR(r30)(r9) + std r31, VCPU_GPR(r31)(r9) + + /* Save SPRGs */ + mfspr r3, SPRN_SPRG0 + mfspr r4, SPRN_SPRG1 + mfspr r5, SPRN_SPRG2 + mfspr r6, SPRN_SPRG3 + std r3, VCPU_SPRG0(r9) + std r4, VCPU_SPRG1(r9) + std r5, VCPU_SPRG2(r9) + std r6, VCPU_SPRG3(r9) + + /* Increment yield count if they have a VPA */ + ld r8, VCPU_VPA(r9) /* do they have a VPA? */ + cmpdi r8, 0 + beq 25f + lwz r3, LPPACA_YIELDCOUNT(r8) + addi r3, r3, 1 + stw r3, LPPACA_YIELDCOUNT(r8) +25: + /* Save PMU registers if requested */ + /* r8 and cr0.eq are live here */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mfspr r4, SPRN_MMCR0 /* save MMCR0 */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + isync + beq 21f /* if no VPA, save PMU stuff anyway */ + lbz r7, LPPACA_PMCINUSE(r8) + cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ + bne 21f + std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ + b 22f +21: mfspr r5, SPRN_MMCR1 + mfspr r6, SPRN_MMCRA + std r4, VCPU_MMCR(r9) + std r5, VCPU_MMCR + 8(r9) + std r6, VCPU_MMCR + 16(r9) + mfspr r3, SPRN_PMC1 + mfspr r4, SPRN_PMC2 + mfspr r5, SPRN_PMC3 + mfspr r6, SPRN_PMC4 + mfspr r7, SPRN_PMC5 + mfspr r8, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + stw r3, VCPU_PMC(r9) + stw r4, VCPU_PMC + 4(r9) + stw r5, VCPU_PMC + 8(r9) + stw r6, VCPU_PMC + 12(r9) + stw r7, VCPU_PMC + 16(r9) + stw r8, VCPU_PMC + 20(r9) +BEGIN_FTR_SECTION + stw r10, VCPU_PMC + 24(r9) + stw r11, VCPU_PMC + 28(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +22: + /* save FP state */ + mr r3, r9 + bl .kvmppc_save_fp + + /* Secondary threads go off to take a nap on POWER7 */ +BEGIN_FTR_SECTION + lwz r0,VCPU_PTID(r3) + cmpwi r0,0 + bne secondary_nap +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* + * Reload DEC. HDEC interrupts were disabled when + * we reloaded the host's LPCR value. + */ + ld r3, HSTATE_DECEXP(r13) + mftb r4 + subf r4, r4, r3 + mtspr SPRN_DEC, r4 + + /* Reload the host's PMU registers */ + ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ + lbz r4, LPPACA_PMCINUSE(r3) + cmpwi r4, 0 + beq 23f /* skip if not */ + lwz r3, HSTATE_PMC(r13) + lwz r4, HSTATE_PMC + 4(r13) + lwz r5, HSTATE_PMC + 8(r13) + lwz r6, HSTATE_PMC + 12(r13) + lwz r8, HSTATE_PMC + 16(r13) + lwz r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION + lwz r10, HSTATE_PMC + 24(r13) + lwz r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_PMC1, r3 + mtspr SPRN_PMC2, r4 + mtspr SPRN_PMC3, r5 + mtspr SPRN_PMC4, r6 + mtspr SPRN_PMC5, r8 + mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + ld r3, HSTATE_MMCR(r13) + ld r4, HSTATE_MMCR + 8(r13) + ld r5, HSTATE_MMCR + 16(r13) + mtspr SPRN_MMCR1, r4 + mtspr SPRN_MMCRA, r5 + mtspr SPRN_MMCR0, r3 + isync +23: + /* + * For external and machine check interrupts, we need + * to call the Linux handler to process the interrupt. + * We do that by jumping to the interrupt vector address + * which we have in r12. The [h]rfid at the end of the + * handler will return to the book3s_hv_interrupts.S code. + * For other interrupts we do the rfid to get back + * to the book3s_interrupts.S code here. + */ + ld r8, HSTATE_VMHANDLER(r13) + ld r7, HSTATE_HOST_MSR(r13) + + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + beq 11f + cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK + + /* RFI into the highmem handler, or branch to interrupt handler */ +12: mfmsr r6 + mtctr r12 + li r0, MSR_RI + andc r6, r6, r0 + mtmsrd r6, 1 /* Clear RI in MSR */ + mtsrr0 r8 + mtsrr1 r7 + beqctr + RFI + +11: +BEGIN_FTR_SECTION + b 12b +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + ba 0x500 + +/* + * Check whether an HDSI is an HPTE not found fault or something else. + * If it is an HPTE not found fault that is due to the guest accessing + * a page that they have mapped but which we have paged out, then + * we continue on with the guest exit path. In all other cases, + * reflect the HDSI to the guest as a DSI. + */ +kvmppc_hdsi: + mfspr r4, SPRN_HDAR + mfspr r6, SPRN_HDSISR + /* HPTE not found fault or protection fault? */ + andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h + beq 1f /* if not, send it to the guest */ + andi. r0, r11, MSR_DR /* data relocation enabled? */ + beq 3f + clrrdi r0, r4, 28 + PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */ + bne 1f /* if no SLB entry found */ +4: std r4, VCPU_FAULT_DAR(r9) + stw r6, VCPU_FAULT_DSISR(r9) + + /* Search the hash table. */ + mr r3, r9 /* vcpu pointer */ + li r7, 1 /* data fault */ + bl .kvmppc_hpte_hv_fault + ld r9, HSTATE_KVM_VCPU(r13) + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE + cmpdi r3, 0 /* retry the instruction */ + beq 6f + cmpdi r3, -1 /* handle in kernel mode */ + beq nohpte_cont + cmpdi r3, -2 /* MMIO emulation; need instr word */ + beq 2f + + /* Synthesize a DSI for the guest */ + ld r4, VCPU_FAULT_DAR(r9) + mr r6, r3 +1: mtspr SPRN_DAR, r4 + mtspr SPRN_DSISR, r6 + mtspr SPRN_SRR0, r10 + mtspr SPRN_SRR1, r11 + li r10, BOOK3S_INTERRUPT_DATA_STORAGE + li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11, r11, 63 +6: ld r7, VCPU_CTR(r9) + lwz r8, VCPU_XER(r9) + mtctr r7 + mtxer r8 + mr r4, r9 + b fast_guest_return + +3: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */ + ld r5, KVM_VRMA_SLB_V(r5) + b 4b + + /* If this is for emulated MMIO, load the instruction word */ +2: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */ + + /* Set guest mode to 'jump over instruction' so if lwz faults + * we'll just continue at the next IP. */ + li r0, KVM_GUEST_MODE_SKIP + stb r0, HSTATE_IN_GUEST(r13) + + /* Do the access with MSR:DR enabled */ + mfmsr r3 + ori r4, r3, MSR_DR /* Enable paging for data */ + mtmsrd r4 + lwz r8, 0(r10) + mtmsrd r3 + + /* Store the result */ + stw r8, VCPU_LAST_INST(r9) + + /* Unset guest mode. */ + li r0, KVM_GUEST_MODE_NONE + stb r0, HSTATE_IN_GUEST(r13) + b nohpte_cont + +/* + * Similarly for an HISI, reflect it to the guest as an ISI unless + * it is an HPTE not found fault for a page that we have paged out. + */ +kvmppc_hisi: + andis. r0, r11, SRR1_ISI_NOPT@h + beq 1f + andi. r0, r11, MSR_IR /* instruction relocation enabled? */ + beq 3f + clrrdi r0, r10, 28 + PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */ + bne 1f /* if no SLB entry found */ +4: + /* Search the hash table. */ + mr r3, r9 /* vcpu pointer */ + mr r4, r10 + mr r6, r11 + li r7, 0 /* instruction fault */ + bl .kvmppc_hpte_hv_fault + ld r9, HSTATE_KVM_VCPU(r13) + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + li r12, BOOK3S_INTERRUPT_H_INST_STORAGE + cmpdi r3, 0 /* retry the instruction */ + beq 6f + cmpdi r3, -1 /* handle in kernel mode */ + beq nohpte_cont + + /* Synthesize an ISI for the guest */ + mr r11, r3 +1: mtspr SPRN_SRR0, r10 + mtspr SPRN_SRR1, r11 + li r10, BOOK3S_INTERRUPT_INST_STORAGE + li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11, r11, 63 +6: ld r7, VCPU_CTR(r9) + lwz r8, VCPU_XER(r9) + mtctr r7 + mtxer r8 + mr r4, r9 + b fast_guest_return + +3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ + ld r5, KVM_VRMA_SLB_V(r6) + b 4b + +/* + * Try to handle an hcall in real mode. + * Returns to the guest if we handle it, or continues on up to + * the kernel if we can't (i.e. if we don't have a handler for + * it, or if the handler returns H_TOO_HARD). + */ + .globl hcall_try_real_mode +hcall_try_real_mode: + ld r3,VCPU_GPR(r3)(r9) + andi. r0,r11,MSR_PR + bne hcall_real_cont + clrrdi r3,r3,2 + cmpldi r3,hcall_real_table_end - hcall_real_table + bge hcall_real_cont + LOAD_REG_ADDR(r4, hcall_real_table) + lwzx r3,r3,r4 + cmpwi r3,0 + beq hcall_real_cont + add r3,r3,r4 + mtctr r3 + mr r3,r9 /* get vcpu pointer */ + ld r4,VCPU_GPR(r4)(r9) + bctrl + cmpdi r3,H_TOO_HARD + beq hcall_real_fallback + ld r4,HSTATE_KVM_VCPU(r13) + std r3,VCPU_GPR(r3)(r4) + ld r10,VCPU_PC(r4) + ld r11,VCPU_MSR(r4) + b fast_guest_return + + /* We've attempted a real mode hcall, but it's punted it back + * to userspace. We need to restore some clobbered volatiles + * before resuming the pass-it-to-qemu path */ +hcall_real_fallback: + li r12,BOOK3S_INTERRUPT_SYSCALL + ld r9, HSTATE_KVM_VCPU(r13) + + b hcall_real_cont + + .globl hcall_real_table +hcall_real_table: + .long 0 /* 0 - unused */ + .long .kvmppc_h_remove - hcall_real_table + .long .kvmppc_h_enter - hcall_real_table + .long .kvmppc_h_read - hcall_real_table + .long 0 /* 0x10 - H_CLEAR_MOD */ + .long 0 /* 0x14 - H_CLEAR_REF */ + .long .kvmppc_h_protect - hcall_real_table + .long 0 /* 0x1c - H_GET_TCE */ + .long .kvmppc_h_put_tce - hcall_real_table + .long 0 /* 0x24 - H_SET_SPRG0 */ + .long .kvmppc_h_set_dabr - hcall_real_table + .long 0 /* 0x2c */ + .long 0 /* 0x30 */ + .long 0 /* 0x34 */ + .long 0 /* 0x38 */ + .long 0 /* 0x3c */ + .long 0 /* 0x40 */ + .long 0 /* 0x44 */ + .long 0 /* 0x48 */ + .long 0 /* 0x4c */ + .long 0 /* 0x50 */ + .long 0 /* 0x54 */ + .long 0 /* 0x58 */ + .long 0 /* 0x5c */ + .long 0 /* 0x60 */ + .long 0 /* 0x64 */ + .long 0 /* 0x68 */ + .long 0 /* 0x6c */ + .long 0 /* 0x70 */ + .long 0 /* 0x74 */ + .long 0 /* 0x78 */ + .long 0 /* 0x7c */ + .long 0 /* 0x80 */ + .long 0 /* 0x84 */ + .long 0 /* 0x88 */ + .long 0 /* 0x8c */ + .long 0 /* 0x90 */ + .long 0 /* 0x94 */ + .long 0 /* 0x98 */ + .long 0 /* 0x9c */ + .long 0 /* 0xa0 */ + .long 0 /* 0xa4 */ + .long 0 /* 0xa8 */ + .long 0 /* 0xac */ + .long 0 /* 0xb0 */ + .long 0 /* 0xb4 */ + .long 0 /* 0xb8 */ + .long 0 /* 0xbc */ + .long 0 /* 0xc0 */ + .long 0 /* 0xc4 */ + .long 0 /* 0xc8 */ + .long 0 /* 0xcc */ + .long 0 /* 0xd0 */ + .long 0 /* 0xd4 */ + .long 0 /* 0xd8 */ + .long 0 /* 0xdc */ + .long .kvmppc_h_cede - hcall_real_table + .long 0 /* 0xe4 */ + .long 0 /* 0xe8 */ + .long 0 /* 0xec */ + .long 0 /* 0xf0 */ + .long 0 /* 0xf4 */ + .long 0 /* 0xf8 */ + .long 0 /* 0xfc */ + .long 0 /* 0x100 */ + .long 0 /* 0x104 */ + .long 0 /* 0x108 */ + .long 0 /* 0x10c */ + .long 0 /* 0x110 */ + .long 0 /* 0x114 */ + .long 0 /* 0x118 */ + .long 0 /* 0x11c */ + .long 0 /* 0x120 */ + .long .kvmppc_h_bulk_remove - hcall_real_table +hcall_real_table_end: + +ignore_hdec: + mr r4,r9 + b fast_guest_return + +bounce_ext_interrupt: + mr r4,r9 + mtspr SPRN_SRR0,r10 + mtspr SPRN_SRR1,r11 + li r10,BOOK3S_INTERRUPT_EXTERNAL + li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11,r11,63 + b fast_guest_return + +_GLOBAL(kvmppc_h_set_dabr) + std r4,VCPU_DABR(r3) + mtspr SPRN_DABR,r4 + li r3,0 + blr + +_GLOBAL(kvmppc_h_cede) + ori r11,r11,MSR_EE + std r11,VCPU_MSR(r3) + li r0,1 + stb r0,VCPU_CEDED(r3) + sync /* order setting ceded vs. testing prodded */ + lbz r5,VCPU_PRODDED(r3) + cmpwi r5,0 + bne 1f + li r0,0 /* set trap to 0 to say hcall is handled */ + stw r0,VCPU_TRAP(r3) + li r0,H_SUCCESS + std r0,VCPU_GPR(r3)(r3) +BEGIN_FTR_SECTION + b 2f /* just send it up to host on 970 */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) + + /* + * Set our bit in the bitmask of napping threads unless all the + * other threads are already napping, in which case we send this + * up to the host. + */ + ld r5,HSTATE_KVM_VCORE(r13) + lwz r6,VCPU_PTID(r3) + lwz r8,VCORE_ENTRY_EXIT(r5) + clrldi r8,r8,56 + li r0,1 + sld r0,r0,r6 + addi r6,r5,VCORE_NAPPING_THREADS +31: lwarx r4,0,r6 + or r4,r4,r0 + PPC_POPCNTW(r7,r4) + cmpw r7,r8 + bge 2f + stwcx. r4,0,r6 + bne 31b + li r0,1 + stb r0,HSTATE_NAPPING(r13) + /* order napping_threads update vs testing entry_exit_count */ + lwsync + mr r4,r3 + lwz r7,VCORE_ENTRY_EXIT(r5) + cmpwi r7,0x100 + bge 33f /* another thread already exiting */ + +/* + * Although not specifically required by the architecture, POWER7 + * preserves the following registers in nap mode, even if an SMT mode + * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3, + * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR. + */ + /* Save non-volatile GPRs */ + std r14, VCPU_GPR(r14)(r3) + std r15, VCPU_GPR(r15)(r3) + std r16, VCPU_GPR(r16)(r3) + std r17, VCPU_GPR(r17)(r3) + std r18, VCPU_GPR(r18)(r3) + std r19, VCPU_GPR(r19)(r3) + std r20, VCPU_GPR(r20)(r3) + std r21, VCPU_GPR(r21)(r3) + std r22, VCPU_GPR(r22)(r3) + std r23, VCPU_GPR(r23)(r3) + std r24, VCPU_GPR(r24)(r3) + std r25, VCPU_GPR(r25)(r3) + std r26, VCPU_GPR(r26)(r3) + std r27, VCPU_GPR(r27)(r3) + std r28, VCPU_GPR(r28)(r3) + std r29, VCPU_GPR(r29)(r3) + std r30, VCPU_GPR(r30)(r3) + std r31, VCPU_GPR(r31)(r3) + + /* save FP state */ + bl .kvmppc_save_fp + + /* + * Take a nap until a decrementer or external interrupt occurs, + * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR + */ + li r0,0x80 + stb r0,PACAPROCSTART(r13) + mfspr r5,SPRN_LPCR + ori r5,r5,LPCR_PECE0 | LPCR_PECE1 + mtspr SPRN_LPCR,r5 + isync + li r0, 0 + std r0, HSTATE_SCRATCH0(r13) + ptesync + ld r0, HSTATE_SCRATCH0(r13) +1: cmpd r0, r0 + bne 1b + nap + b . + +kvm_end_cede: + /* Woken by external or decrementer interrupt */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + + /* If we're a secondary thread and we got here by an IPI, ack it */ + ld r4,HSTATE_KVM_VCPU(r13) + lwz r3,VCPU_PTID(r4) + cmpwi r3,0 + beq 27f + mfspr r3,SPRN_SRR1 + rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ + cmpwi r3,4 /* was it an external interrupt? */ + bne 27f + ld r5, HSTATE_XICS_PHYS(r13) + li r0,0xff + li r6,XICS_QIRR + li r7,XICS_XIRR + lwzcix r8,r5,r7 /* ack the interrupt */ + sync + stbcix r0,r5,r6 /* clear it */ + stwcix r8,r5,r7 /* EOI it */ +27: + /* load up FP state */ + bl kvmppc_load_fp + + /* Load NV GPRS */ + ld r14, VCPU_GPR(r14)(r4) + ld r15, VCPU_GPR(r15)(r4) + ld r16, VCPU_GPR(r16)(r4) + ld r17, VCPU_GPR(r17)(r4) + ld r18, VCPU_GPR(r18)(r4) + ld r19, VCPU_GPR(r19)(r4) + ld r20, VCPU_GPR(r20)(r4) + ld r21, VCPU_GPR(r21)(r4) + ld r22, VCPU_GPR(r22)(r4) + ld r23, VCPU_GPR(r23)(r4) + ld r24, VCPU_GPR(r24)(r4) + ld r25, VCPU_GPR(r25)(r4) + ld r26, VCPU_GPR(r26)(r4) + ld r27, VCPU_GPR(r27)(r4) + ld r28, VCPU_GPR(r28)(r4) + ld r29, VCPU_GPR(r29)(r4) + ld r30, VCPU_GPR(r30)(r4) + ld r31, VCPU_GPR(r31)(r4) + + /* clear our bit in vcore->napping_threads */ +33: ld r5,HSTATE_KVM_VCORE(r13) + lwz r3,VCPU_PTID(r4) + li r0,1 + sld r0,r0,r3 + addi r6,r5,VCORE_NAPPING_THREADS +32: lwarx r7,0,r6 + andc r7,r7,r0 + stwcx. r7,0,r6 + bne 32b + li r0,0 + stb r0,HSTATE_NAPPING(r13) + + /* see if any other thread is already exiting */ + lwz r0,VCORE_ENTRY_EXIT(r5) + cmpwi r0,0x100 + blt kvmppc_cede_reentry /* if not go back to guest */ + + /* some threads are exiting, so go to the guest exit path */ + b hcall_real_fallback + + /* cede when already previously prodded case */ +1: li r0,0 + stb r0,VCPU_PRODDED(r3) + sync /* order testing prodded vs. clearing ceded */ + stb r0,VCPU_CEDED(r3) + li r3,H_SUCCESS + blr + + /* we've ceded but we want to give control to the host */ +2: li r3,H_TOO_HARD + blr + +secondary_too_late: + ld r5,HSTATE_KVM_VCORE(r13) + HMT_LOW +13: lbz r3,VCORE_IN_GUEST(r5) + cmpwi r3,0 + bne 13b + HMT_MEDIUM + ld r11,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + ld r5,SLBSHADOW_SAVEAREA(r11) + ld r6,SLBSHADOW_SAVEAREA+8(r11) + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r11,r11,16 + .endr + +secondary_nap: + /* Clear any pending IPI - assume we're a secondary thread */ + ld r5, HSTATE_XICS_PHYS(r13) + li r7, XICS_XIRR + lwzcix r3, r5, r7 /* ack any pending interrupt */ + rlwinm. r0, r3, 0, 0xffffff /* any pending? */ + beq 37f + sync + li r0, 0xff + li r6, XICS_QIRR + stbcix r0, r5, r6 /* clear the IPI */ + stwcix r3, r5, r7 /* EOI it */ +37: sync + + /* increment the nap count and then go to nap mode */ + ld r4, HSTATE_KVM_VCORE(r13) + addi r4, r4, VCORE_NAP_COUNT + lwsync /* make previous updates visible */ +51: lwarx r3, 0, r4 + addi r3, r3, 1 + stwcx. r3, 0, r4 + bne 51b + + li r3, LPCR_PECE0 + mfspr r4, SPRN_LPCR + rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 + mtspr SPRN_LPCR, r4 + isync + li r0, 0 + std r0, HSTATE_SCRATCH0(r13) + ptesync + ld r0, HSTATE_SCRATCH0(r13) +1: cmpd r0, r0 + bne 1b + nap + b . + +/* + * Save away FP, VMX and VSX registers. + * r3 = vcpu pointer + */ +_GLOBAL(kvmppc_save_fp) + mfmsr r9 + ori r8,r9,MSR_FP +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r8,r8,MSR_VEC@h +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r8,r8,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + mtmsrd r8 + isync +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + reg = 0 + .rept 32 + li r6,reg*16+VCPU_VSRS + STXVD2X(reg,r6,r3) + reg = reg + 1 + .endr +FTR_SECTION_ELSE +#endif + reg = 0 + .rept 32 + stfd reg,reg*8+VCPU_FPRS(r3) + reg = reg + 1 + .endr +#ifdef CONFIG_VSX +ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) +#endif + mffs fr0 + stfd fr0,VCPU_FPSCR(r3) + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + reg = 0 + .rept 32 + li r6,reg*16+VCPU_VRS + stvx reg,r6,r3 + reg = reg + 1 + .endr + mfvscr vr0 + li r6,VCPU_VSCR + stvx vr0,r6,r3 +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + mfspr r6,SPRN_VRSAVE + stw r6,VCPU_VRSAVE(r3) + mtmsrd r9 + isync + blr + +/* + * Load up FP, VMX and VSX registers + * r4 = vcpu pointer + */ + .globl kvmppc_load_fp +kvmppc_load_fp: + mfmsr r9 + ori r8,r9,MSR_FP +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r8,r8,MSR_VEC@h +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r8,r8,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + mtmsrd r8 + isync + lfd fr0,VCPU_FPSCR(r4) + MTFSF_L(fr0) +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + reg = 0 + .rept 32 + li r7,reg*16+VCPU_VSRS + LXVD2X(reg,r7,r4) + reg = reg + 1 + .endr +FTR_SECTION_ELSE +#endif + reg = 0 + .rept 32 + lfd reg,reg*8+VCPU_FPRS(r4) + reg = reg + 1 + .endr +#ifdef CONFIG_VSX +ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) +#endif + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + li r7,VCPU_VSCR + lvx vr0,r7,r4 + mtvscr vr0 + reg = 0 + .rept 32 + li r7,reg*16+VCPU_VRS + lvx reg,r7,r4 + reg = reg + 1 + .endr +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + lwz r7,VCPU_VRSAVE(r4) + mtspr SPRN_VRSAVE,r7 + blr diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S new file mode 100644 index 00000000..3e35383b --- /dev/null +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -0,0 +1,201 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> +#include <asm/exception-64s.h> + +#if defined(CONFIG_PPC_BOOK3S_64) + +#define ULONG_SIZE 8 +#define FUNC(name) GLUE(.,name) + +#elif defined(CONFIG_PPC_BOOK3S_32) + +#define ULONG_SIZE 4 +#define FUNC(name) name + +#endif /* CONFIG_PPC_BOOK3S_XX */ + + +#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) +#define VCPU_LOAD_NVGPRS(vcpu) \ + PPC_LL r14, VCPU_GPR(r14)(vcpu); \ + PPC_LL r15, VCPU_GPR(r15)(vcpu); \ + PPC_LL r16, VCPU_GPR(r16)(vcpu); \ + PPC_LL r17, VCPU_GPR(r17)(vcpu); \ + PPC_LL r18, VCPU_GPR(r18)(vcpu); \ + PPC_LL r19, VCPU_GPR(r19)(vcpu); \ + PPC_LL r20, VCPU_GPR(r20)(vcpu); \ + PPC_LL r21, VCPU_GPR(r21)(vcpu); \ + PPC_LL r22, VCPU_GPR(r22)(vcpu); \ + PPC_LL r23, VCPU_GPR(r23)(vcpu); \ + PPC_LL r24, VCPU_GPR(r24)(vcpu); \ + PPC_LL r25, VCPU_GPR(r25)(vcpu); \ + PPC_LL r26, VCPU_GPR(r26)(vcpu); \ + PPC_LL r27, VCPU_GPR(r27)(vcpu); \ + PPC_LL r28, VCPU_GPR(r28)(vcpu); \ + PPC_LL r29, VCPU_GPR(r29)(vcpu); \ + PPC_LL r30, VCPU_GPR(r30)(vcpu); \ + PPC_LL r31, VCPU_GPR(r31)(vcpu); \ + +/***************************************************************************** + * * + * Guest entry / exit code that is in kernel module memory (highmem) * + * * + ****************************************************************************/ + +/* Registers: + * r3: kvm_run pointer + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcpu_run) + +kvm_start_entry: + /* Write correct stack frame */ + mflr r0 + PPC_STL r0,PPC_LR_STKOFF(r1) + + /* Save host state to the stack */ + PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) + + /* Save r3 (kvm_run) and r4 (vcpu) */ + SAVE_2GPRS(3, r1) + + /* Save non-volatile registers (r14 - r31) */ + SAVE_NVGPRS(r1) + + /* Save CR */ + mfcr r14 + stw r14, _CCR(r1) + + /* Save LR */ + PPC_STL r0, _LINK(r1) + + /* Load non-volatile guest state from the vcpu */ + VCPU_LOAD_NVGPRS(r4) + +kvm_start_lightweight: + +#ifdef CONFIG_PPC_BOOK3S_64 + PPC_LL r3, VCPU_HFLAGS(r4) + rldicl r3, r3, 0, 63 /* r3 &= 1 */ + stb r3, HSTATE_RESTORE_HID5(r13) +#endif /* CONFIG_PPC_BOOK3S_64 */ + + PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ + + /* Jump to segment patching handler and into our guest */ + bl FUNC(kvmppc_entry_trampoline) + nop + +/* + * This is the handler in module memory. It gets jumped at from the + * lowmem trampoline code, so it's basically the guest exit code. + * + */ + +.global kvmppc_handler_highmem +kvmppc_handler_highmem: + + /* + * Register usage at this point: + * + * R1 = host R1 + * R2 = host R2 + * R12 = exit handler id + * R13 = PACA + * SVCPU.* = guest * + * + */ + + /* R7 = vcpu */ + PPC_LL r7, GPR4(r1) + + PPC_STL r14, VCPU_GPR(r14)(r7) + PPC_STL r15, VCPU_GPR(r15)(r7) + PPC_STL r16, VCPU_GPR(r16)(r7) + PPC_STL r17, VCPU_GPR(r17)(r7) + PPC_STL r18, VCPU_GPR(r18)(r7) + PPC_STL r19, VCPU_GPR(r19)(r7) + PPC_STL r20, VCPU_GPR(r20)(r7) + PPC_STL r21, VCPU_GPR(r21)(r7) + PPC_STL r22, VCPU_GPR(r22)(r7) + PPC_STL r23, VCPU_GPR(r23)(r7) + PPC_STL r24, VCPU_GPR(r24)(r7) + PPC_STL r25, VCPU_GPR(r25)(r7) + PPC_STL r26, VCPU_GPR(r26)(r7) + PPC_STL r27, VCPU_GPR(r27)(r7) + PPC_STL r28, VCPU_GPR(r28)(r7) + PPC_STL r29, VCPU_GPR(r29)(r7) + PPC_STL r30, VCPU_GPR(r30)(r7) + PPC_STL r31, VCPU_GPR(r31)(r7) + + /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ + mr r5, r12 + + /* Restore r3 (kvm_run) and r4 (vcpu) */ + REST_2GPRS(3, r1) + bl FUNC(kvmppc_handle_exit) + + /* If RESUME_GUEST, get back in the loop */ + cmpwi r3, RESUME_GUEST + beq kvm_loop_lightweight + + cmpwi r3, RESUME_GUEST_NV + beq kvm_loop_heavyweight + +kvm_exit_loop: + + PPC_LL r4, _LINK(r1) + mtlr r4 + + lwz r14, _CCR(r1) + mtcr r14 + + /* Restore non-volatile host registers (r14 - r31) */ + REST_NVGPRS(r1) + + addi r1, r1, SWITCH_FRAME_SIZE + blr + +kvm_loop_heavyweight: + + PPC_LL r4, _LINK(r1) + PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) + + /* Load vcpu and cpu_run */ + REST_2GPRS(3, r1) + + /* Load non-volatile guest state from the vcpu */ + VCPU_LOAD_NVGPRS(r4) + + /* Jump back into the beginning of this function */ + b kvm_start_lightweight + +kvm_loop_lightweight: + + /* We'll need the vcpu pointer */ + REST_GPR(4, r1) + + /* Jump back into the beginning of this function */ + b kvm_start_lightweight diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c new file mode 100644 index 00000000..41cb0017 --- /dev/null +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -0,0 +1,352 @@ +/* + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> +#include <linux/hash.h> +#include <linux/slab.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/machdep.h> +#include <asm/mmu_context.h> +#include <asm/hw_irq.h> + +#include "trace.h" + +#define PTE_SIZE 12 + +static struct kmem_cache *hpte_cache; + +static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) +{ + return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE); +} + +static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr) +{ + return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE, + HPTEG_HASH_BITS_PTE_LONG); +} + +static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) +{ + return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE); +} + +static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) +{ + return hash_64((vpage & 0xffffff000ULL) >> 12, + HPTEG_HASH_BITS_VPTE_LONG); +} + +void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) +{ + u64 index; + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + + trace_kvm_book3s_mmu_map(pte); + + spin_lock(&vcpu3s->mmu_lock); + + /* Add to ePTE list */ + index = kvmppc_mmu_hash_pte(pte->pte.eaddr); + hlist_add_head_rcu(&pte->list_pte, &vcpu3s->hpte_hash_pte[index]); + + /* Add to ePTE_long list */ + index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr); + hlist_add_head_rcu(&pte->list_pte_long, + &vcpu3s->hpte_hash_pte_long[index]); + + /* Add to vPTE list */ + index = kvmppc_mmu_hash_vpte(pte->pte.vpage); + hlist_add_head_rcu(&pte->list_vpte, &vcpu3s->hpte_hash_vpte[index]); + + /* Add to vPTE_long list */ + index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); + hlist_add_head_rcu(&pte->list_vpte_long, + &vcpu3s->hpte_hash_vpte_long[index]); + + spin_unlock(&vcpu3s->mmu_lock); +} + +static void free_pte_rcu(struct rcu_head *head) +{ + struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head); + kmem_cache_free(hpte_cache, pte); +} + +static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + + trace_kvm_book3s_mmu_invalidate(pte); + + /* Different for 32 and 64 bit */ + kvmppc_mmu_invalidate_pte(vcpu, pte); + + spin_lock(&vcpu3s->mmu_lock); + + /* pte already invalidated in between? */ + if (hlist_unhashed(&pte->list_pte)) { + spin_unlock(&vcpu3s->mmu_lock); + return; + } + + hlist_del_init_rcu(&pte->list_pte); + hlist_del_init_rcu(&pte->list_pte_long); + hlist_del_init_rcu(&pte->list_vpte); + hlist_del_init_rcu(&pte->list_vpte_long); + + if (pte->pte.may_write) + kvm_release_pfn_dirty(pte->pfn); + else + kvm_release_pfn_clean(pte->pfn); + + spin_unlock(&vcpu3s->mmu_lock); + + vcpu3s->hpte_cache_count--; + call_rcu(&pte->rcu_head, free_pte_rcu); +} + +static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hpte_cache *pte; + struct hlist_node *node; + int i; + + rcu_read_lock(); + + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { + struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; + + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + invalidate_pte(vcpu, pte); + } + + rcu_read_unlock(); +} + +static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hlist_head *list; + struct hlist_node *node; + struct hpte_cache *pte; + + /* Find the list of entries in the map */ + list = &vcpu3s->hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; + + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, node, list, list_pte) + if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} + +static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hlist_head *list; + struct hlist_node *node; + struct hpte_cache *pte; + + /* Find the list of entries in the map */ + list = &vcpu3s->hpte_hash_pte_long[ + kvmppc_mmu_hash_pte_long(guest_ea)]; + + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, node, list, list_pte_long) + if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} + +void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +{ + trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask); + guest_ea &= ea_mask; + + switch (ea_mask) { + case ~0xfffUL: + kvmppc_mmu_pte_flush_page(vcpu, guest_ea); + break; + case 0x0ffff000: + kvmppc_mmu_pte_flush_long(vcpu, guest_ea); + break; + case 0: + /* Doing a complete flush -> start from scratch */ + kvmppc_mmu_pte_flush_all(vcpu); + break; + default: + WARN_ON(1); + break; + } +} + +/* Flush with mask 0xfffffffff */ +static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hlist_head *list; + struct hlist_node *node; + struct hpte_cache *pte; + u64 vp_mask = 0xfffffffffULL; + + list = &vcpu3s->hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; + + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, node, list, list_vpte) + if ((pte->pte.vpage & vp_mask) == guest_vp) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} + +/* Flush with mask 0xffffff000 */ +static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hlist_head *list; + struct hlist_node *node; + struct hpte_cache *pte; + u64 vp_mask = 0xffffff000ULL; + + list = &vcpu3s->hpte_hash_vpte_long[ + kvmppc_mmu_hash_vpte_long(guest_vp)]; + + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + if ((pte->pte.vpage & vp_mask) == guest_vp) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} + +void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) +{ + trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask); + guest_vp &= vp_mask; + + switch(vp_mask) { + case 0xfffffffffULL: + kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); + break; + case 0xffffff000ULL: + kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); + break; + default: + WARN_ON(1); + return; + } +} + +void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hlist_node *node; + struct hpte_cache *pte; + int i; + + trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end); + + rcu_read_lock(); + + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { + struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; + + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + if ((pte->pte.raddr >= pa_start) && + (pte->pte.raddr < pa_end)) + invalidate_pte(vcpu, pte); + } + + rcu_read_unlock(); +} + +struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hpte_cache *pte; + + pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); + vcpu3s->hpte_cache_count++; + + if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM) + kvmppc_mmu_pte_flush_all(vcpu); + + return pte; +} + +void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) +{ + kvmppc_mmu_pte_flush(vcpu, 0, 0); +} + +static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len) +{ + int i; + + for (i = 0; i < len; i++) + INIT_HLIST_HEAD(&hash_list[i]); +} + +int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + + /* init hpte lookup hashes */ + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte, + ARRAY_SIZE(vcpu3s->hpte_hash_pte)); + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte_long, + ARRAY_SIZE(vcpu3s->hpte_hash_pte_long)); + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte, + ARRAY_SIZE(vcpu3s->hpte_hash_vpte)); + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long, + ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); + + spin_lock_init(&vcpu3s->mmu_lock); + + return 0; +} + +int kvmppc_mmu_hpte_sysinit(void) +{ + /* init hpte slab cache */ + hpte_cache = kmem_cache_create("kvm-spt", sizeof(struct hpte_cache), + sizeof(struct hpte_cache), 0, NULL); + + return 0; +} + +void kvmppc_mmu_hpte_sysexit(void) +{ + kmem_cache_destroy(hpte_cache); +} diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c new file mode 100644 index 00000000..a59a25a1 --- /dev/null +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -0,0 +1,1270 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright Novell Inc 2010 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/kvm.h> +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_fpu.h> +#include <asm/reg.h> +#include <asm/cacheflush.h> +#include <asm/switch_to.h> +#include <linux/vmalloc.h> + +/* #define DEBUG */ + +#ifdef DEBUG +#define dprintk printk +#else +#define dprintk(...) do { } while(0); +#endif + +#define OP_LFS 48 +#define OP_LFSU 49 +#define OP_LFD 50 +#define OP_LFDU 51 +#define OP_STFS 52 +#define OP_STFSU 53 +#define OP_STFD 54 +#define OP_STFDU 55 +#define OP_PSQ_L 56 +#define OP_PSQ_LU 57 +#define OP_PSQ_ST 60 +#define OP_PSQ_STU 61 + +#define OP_31_LFSX 535 +#define OP_31_LFSUX 567 +#define OP_31_LFDX 599 +#define OP_31_LFDUX 631 +#define OP_31_STFSX 663 +#define OP_31_STFSUX 695 +#define OP_31_STFX 727 +#define OP_31_STFUX 759 +#define OP_31_LWIZX 887 +#define OP_31_STFIWX 983 + +#define OP_59_FADDS 21 +#define OP_59_FSUBS 20 +#define OP_59_FSQRTS 22 +#define OP_59_FDIVS 18 +#define OP_59_FRES 24 +#define OP_59_FMULS 25 +#define OP_59_FRSQRTES 26 +#define OP_59_FMSUBS 28 +#define OP_59_FMADDS 29 +#define OP_59_FNMSUBS 30 +#define OP_59_FNMADDS 31 + +#define OP_63_FCMPU 0 +#define OP_63_FCPSGN 8 +#define OP_63_FRSP 12 +#define OP_63_FCTIW 14 +#define OP_63_FCTIWZ 15 +#define OP_63_FDIV 18 +#define OP_63_FADD 21 +#define OP_63_FSQRT 22 +#define OP_63_FSEL 23 +#define OP_63_FRE 24 +#define OP_63_FMUL 25 +#define OP_63_FRSQRTE 26 +#define OP_63_FMSUB 28 +#define OP_63_FMADD 29 +#define OP_63_FNMSUB 30 +#define OP_63_FNMADD 31 +#define OP_63_FCMPO 32 +#define OP_63_MTFSB1 38 // XXX +#define OP_63_FSUB 20 +#define OP_63_FNEG 40 +#define OP_63_MCRFS 64 +#define OP_63_MTFSB0 70 +#define OP_63_FMR 72 +#define OP_63_MTFSFI 134 +#define OP_63_FABS 264 +#define OP_63_MFFS 583 +#define OP_63_MTFSF 711 + +#define OP_4X_PS_CMPU0 0 +#define OP_4X_PSQ_LX 6 +#define OP_4XW_PSQ_STX 7 +#define OP_4A_PS_SUM0 10 +#define OP_4A_PS_SUM1 11 +#define OP_4A_PS_MULS0 12 +#define OP_4A_PS_MULS1 13 +#define OP_4A_PS_MADDS0 14 +#define OP_4A_PS_MADDS1 15 +#define OP_4A_PS_DIV 18 +#define OP_4A_PS_SUB 20 +#define OP_4A_PS_ADD 21 +#define OP_4A_PS_SEL 23 +#define OP_4A_PS_RES 24 +#define OP_4A_PS_MUL 25 +#define OP_4A_PS_RSQRTE 26 +#define OP_4A_PS_MSUB 28 +#define OP_4A_PS_MADD 29 +#define OP_4A_PS_NMSUB 30 +#define OP_4A_PS_NMADD 31 +#define OP_4X_PS_CMPO0 32 +#define OP_4X_PSQ_LUX 38 +#define OP_4XW_PSQ_STUX 39 +#define OP_4X_PS_NEG 40 +#define OP_4X_PS_CMPU1 64 +#define OP_4X_PS_MR 72 +#define OP_4X_PS_CMPO1 96 +#define OP_4X_PS_NABS 136 +#define OP_4X_PS_ABS 264 +#define OP_4X_PS_MERGE00 528 +#define OP_4X_PS_MERGE01 560 +#define OP_4X_PS_MERGE10 592 +#define OP_4X_PS_MERGE11 624 + +#define SCALAR_NONE 0 +#define SCALAR_HIGH (1 << 0) +#define SCALAR_LOW (1 << 1) +#define SCALAR_NO_PS0 (1 << 2) +#define SCALAR_NO_PS1 (1 << 3) + +#define GQR_ST_TYPE_MASK 0x00000007 +#define GQR_ST_TYPE_SHIFT 0 +#define GQR_ST_SCALE_MASK 0x00003f00 +#define GQR_ST_SCALE_SHIFT 8 +#define GQR_LD_TYPE_MASK 0x00070000 +#define GQR_LD_TYPE_SHIFT 16 +#define GQR_LD_SCALE_MASK 0x3f000000 +#define GQR_LD_SCALE_SHIFT 24 + +#define GQR_QUANTIZE_FLOAT 0 +#define GQR_QUANTIZE_U8 4 +#define GQR_QUANTIZE_U16 5 +#define GQR_QUANTIZE_S8 6 +#define GQR_QUANTIZE_S16 7 + +#define FPU_LS_SINGLE 0 +#define FPU_LS_DOUBLE 1 +#define FPU_LS_SINGLE_LOW 2 + +static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) +{ + kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]); +} + +static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) +{ + u64 dsisr; + struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared; + + shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0); + shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0); + shared->dar = eaddr; + /* Page Fault */ + dsisr = kvmppc_set_field(0, 33, 33, 1); + if (is_store) + shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); +} + +static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, int ls_type) +{ + int emulated = EMULATE_FAIL; + int r; + char tmp[8]; + int len = sizeof(u32); + + if (ls_type == FPU_LS_DOUBLE) + len = sizeof(u64); + + /* read from memory */ + r = kvmppc_ld(vcpu, &addr, len, tmp, true); + vcpu->arch.paddr_accessed = addr; + + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, false); + goto done_load; + } else if (r == EMULATE_DO_MMIO) { + emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + len, 1); + goto done_load; + } + + emulated = EMULATE_DONE; + + /* put in registers */ + switch (ls_type) { + case FPU_LS_SINGLE: + kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]); + vcpu->arch.qpr[rs] = *((u32*)tmp); + break; + case FPU_LS_DOUBLE: + vcpu->arch.fpr[rs] = *((u64*)tmp); + break; + } + + dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp, + addr, len); + +done_load: + return emulated; +} + +static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, int ls_type) +{ + int emulated = EMULATE_FAIL; + int r; + char tmp[8]; + u64 val; + int len; + + switch (ls_type) { + case FPU_LS_SINGLE: + kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp); + val = *((u32*)tmp); + len = sizeof(u32); + break; + case FPU_LS_SINGLE_LOW: + *((u32*)tmp) = vcpu->arch.fpr[rs]; + val = vcpu->arch.fpr[rs] & 0xffffffff; + len = sizeof(u32); + break; + case FPU_LS_DOUBLE: + *((u64*)tmp) = vcpu->arch.fpr[rs]; + val = vcpu->arch.fpr[rs]; + len = sizeof(u64); + break; + default: + val = 0; + len = 0; + } + + r = kvmppc_st(vcpu, &addr, len, tmp, true); + vcpu->arch.paddr_accessed = addr; + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, true); + } else if (r == EMULATE_DO_MMIO) { + emulated = kvmppc_handle_store(run, vcpu, val, len, 1); + } else { + emulated = EMULATE_DONE; + } + + dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n", + val, addr, len); + + return emulated; +} + +static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, bool w, int i) +{ + int emulated = EMULATE_FAIL; + int r; + float one = 1.0; + u32 tmp[2]; + + /* read from memory */ + if (w) { + r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); + memcpy(&tmp[1], &one, sizeof(u32)); + } else { + r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true); + } + vcpu->arch.paddr_accessed = addr; + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, false); + goto done_load; + } else if ((r == EMULATE_DO_MMIO) && w) { + emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + 4, 1); + vcpu->arch.qpr[rs] = tmp[1]; + goto done_load; + } else if (r == EMULATE_DO_MMIO) { + emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs, + 8, 1); + goto done_load; + } + + emulated = EMULATE_DONE; + + /* put in registers */ + kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]); + vcpu->arch.qpr[rs] = tmp[1]; + + dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], + tmp[1], addr, w ? 4 : 8); + +done_load: + return emulated; +} + +static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, bool w, int i) +{ + int emulated = EMULATE_FAIL; + int r; + u32 tmp[2]; + int len = w ? sizeof(u32) : sizeof(u64); + + kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]); + tmp[1] = vcpu->arch.qpr[rs]; + + r = kvmppc_st(vcpu, &addr, len, tmp, true); + vcpu->arch.paddr_accessed = addr; + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, true); + } else if ((r == EMULATE_DO_MMIO) && w) { + emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); + } else if (r == EMULATE_DO_MMIO) { + u64 val = ((u64)tmp[0] << 32) | tmp[1]; + emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); + } else { + emulated = EMULATE_DONE; + } + + dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n", + tmp[0], tmp[1], addr, len); + + return emulated; +} + +/* + * Cuts out inst bits with ordering according to spec. + * That means the leftmost bit is zero. All given bits are included. + */ +static inline u32 inst_get_field(u32 inst, int msb, int lsb) +{ + return kvmppc_get_field(inst, msb + 32, lsb + 32); +} + +/* + * Replaces inst bits with ordering according to spec. + */ +static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) +{ + return kvmppc_set_field(inst, msb + 32, lsb + 32, value); +} + +bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) +{ + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) + return false; + + switch (get_op(inst)) { + case OP_PSQ_L: + case OP_PSQ_LU: + case OP_PSQ_ST: + case OP_PSQ_STU: + case OP_LFS: + case OP_LFSU: + case OP_LFD: + case OP_LFDU: + case OP_STFS: + case OP_STFSU: + case OP_STFD: + case OP_STFDU: + return true; + case 4: + /* X form */ + switch (inst_get_field(inst, 21, 30)) { + case OP_4X_PS_CMPU0: + case OP_4X_PSQ_LX: + case OP_4X_PS_CMPO0: + case OP_4X_PSQ_LUX: + case OP_4X_PS_NEG: + case OP_4X_PS_CMPU1: + case OP_4X_PS_MR: + case OP_4X_PS_CMPO1: + case OP_4X_PS_NABS: + case OP_4X_PS_ABS: + case OP_4X_PS_MERGE00: + case OP_4X_PS_MERGE01: + case OP_4X_PS_MERGE10: + case OP_4X_PS_MERGE11: + return true; + } + /* XW form */ + switch (inst_get_field(inst, 25, 30)) { + case OP_4XW_PSQ_STX: + case OP_4XW_PSQ_STUX: + return true; + } + /* A form */ + switch (inst_get_field(inst, 26, 30)) { + case OP_4A_PS_SUM1: + case OP_4A_PS_SUM0: + case OP_4A_PS_MULS0: + case OP_4A_PS_MULS1: + case OP_4A_PS_MADDS0: + case OP_4A_PS_MADDS1: + case OP_4A_PS_DIV: + case OP_4A_PS_SUB: + case OP_4A_PS_ADD: + case OP_4A_PS_SEL: + case OP_4A_PS_RES: + case OP_4A_PS_MUL: + case OP_4A_PS_RSQRTE: + case OP_4A_PS_MSUB: + case OP_4A_PS_MADD: + case OP_4A_PS_NMSUB: + case OP_4A_PS_NMADD: + return true; + } + break; + case 59: + switch (inst_get_field(inst, 21, 30)) { + case OP_59_FADDS: + case OP_59_FSUBS: + case OP_59_FDIVS: + case OP_59_FRES: + case OP_59_FRSQRTES: + return true; + } + switch (inst_get_field(inst, 26, 30)) { + case OP_59_FMULS: + case OP_59_FMSUBS: + case OP_59_FMADDS: + case OP_59_FNMSUBS: + case OP_59_FNMADDS: + return true; + } + break; + case 63: + switch (inst_get_field(inst, 21, 30)) { + case OP_63_MTFSB0: + case OP_63_MTFSB1: + case OP_63_MTFSF: + case OP_63_MTFSFI: + case OP_63_MCRFS: + case OP_63_MFFS: + case OP_63_FCMPU: + case OP_63_FCMPO: + case OP_63_FNEG: + case OP_63_FMR: + case OP_63_FABS: + case OP_63_FRSP: + case OP_63_FDIV: + case OP_63_FADD: + case OP_63_FSUB: + case OP_63_FCTIW: + case OP_63_FCTIWZ: + case OP_63_FRSQRTE: + case OP_63_FCPSGN: + return true; + } + switch (inst_get_field(inst, 26, 30)) { + case OP_63_FMUL: + case OP_63_FSEL: + case OP_63_FMSUB: + case OP_63_FMADD: + case OP_63_FNMSUB: + case OP_63_FNMADD: + return true; + } + break; + case 31: + switch (inst_get_field(inst, 21, 30)) { + case OP_31_LFSX: + case OP_31_LFSUX: + case OP_31_LFDX: + case OP_31_LFDUX: + case OP_31_STFSX: + case OP_31_STFSUX: + case OP_31_STFX: + case OP_31_STFUX: + case OP_31_STFIWX: + return true; + } + break; + } + + return false; +} + +static int get_d_signext(u32 inst) +{ + int d = inst & 0x8ff; + + if (d & 0x800) + return -(d & 0x7ff); + + return (d & 0x7ff); +} + +static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, + int reg_out, int reg_in1, int reg_in2, + int reg_in3, int scalar, + void (*func)(u64 *fpscr, + u32 *dst, u32 *src1, + u32 *src2, u32 *src3)) +{ + u32 *qpr = vcpu->arch.qpr; + u64 *fpr = vcpu->arch.fpr; + u32 ps0_out; + u32 ps0_in1, ps0_in2, ps0_in3; + u32 ps1_in1, ps1_in2, ps1_in3; + + /* RC */ + WARN_ON(rc); + + /* PS0 */ + kvm_cvt_df(&fpr[reg_in1], &ps0_in1); + kvm_cvt_df(&fpr[reg_in2], &ps0_in2); + kvm_cvt_df(&fpr[reg_in3], &ps0_in3); + + if (scalar & SCALAR_LOW) + ps0_in2 = qpr[reg_in2]; + + func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); + + dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", + ps0_in1, ps0_in2, ps0_in3, ps0_out); + + if (!(scalar & SCALAR_NO_PS0)) + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + + /* PS1 */ + ps1_in1 = qpr[reg_in1]; + ps1_in2 = qpr[reg_in2]; + ps1_in3 = qpr[reg_in3]; + + if (scalar & SCALAR_HIGH) + ps1_in2 = ps0_in2; + + if (!(scalar & SCALAR_NO_PS1)) + func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); + + dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", + ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); + + return EMULATE_DONE; +} + +static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, + int reg_out, int reg_in1, int reg_in2, + int scalar, + void (*func)(u64 *fpscr, + u32 *dst, u32 *src1, + u32 *src2)) +{ + u32 *qpr = vcpu->arch.qpr; + u64 *fpr = vcpu->arch.fpr; + u32 ps0_out; + u32 ps0_in1, ps0_in2; + u32 ps1_out; + u32 ps1_in1, ps1_in2; + + /* RC */ + WARN_ON(rc); + + /* PS0 */ + kvm_cvt_df(&fpr[reg_in1], &ps0_in1); + + if (scalar & SCALAR_LOW) + ps0_in2 = qpr[reg_in2]; + else + kvm_cvt_df(&fpr[reg_in2], &ps0_in2); + + func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); + + if (!(scalar & SCALAR_NO_PS0)) { + dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", + ps0_in1, ps0_in2, ps0_out); + + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + } + + /* PS1 */ + ps1_in1 = qpr[reg_in1]; + ps1_in2 = qpr[reg_in2]; + + if (scalar & SCALAR_HIGH) + ps1_in2 = ps0_in2; + + func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2); + + if (!(scalar & SCALAR_NO_PS1)) { + qpr[reg_out] = ps1_out; + + dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n", + ps1_in1, ps1_in2, qpr[reg_out]); + } + + return EMULATE_DONE; +} + +static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, + int reg_out, int reg_in, + void (*func)(u64 *t, + u32 *dst, u32 *src1)) +{ + u32 *qpr = vcpu->arch.qpr; + u64 *fpr = vcpu->arch.fpr; + u32 ps0_out, ps0_in; + u32 ps1_in; + + /* RC */ + WARN_ON(rc); + + /* PS0 */ + kvm_cvt_df(&fpr[reg_in], &ps0_in); + func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); + + dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", + ps0_in, ps0_out); + + kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + + /* PS1 */ + ps1_in = qpr[reg_in]; + func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in); + + dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", + ps1_in, qpr[reg_out]); + + return EMULATE_DONE; +} + +int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u32 inst = kvmppc_get_last_inst(vcpu); + enum emulation_result emulated = EMULATE_DONE; + + int ax_rd = inst_get_field(inst, 6, 10); + int ax_ra = inst_get_field(inst, 11, 15); + int ax_rb = inst_get_field(inst, 16, 20); + int ax_rc = inst_get_field(inst, 21, 25); + short full_d = inst_get_field(inst, 16, 31); + + u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; + u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; + u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; + u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; + + bool rcomp = (inst & 1) ? true : false; + u32 cr = kvmppc_get_cr(vcpu); +#ifdef DEBUG + int i; +#endif + + if (!kvmppc_inst_is_paired_single(vcpu, inst)) + return EMULATE_FAIL; + + if (!(vcpu->arch.shared->msr & MSR_FP)) { + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); + return EMULATE_AGAIN; + } + + kvmppc_giveup_ext(vcpu, MSR_FP); + preempt_disable(); + enable_kernel_fp(); + /* Do we need to clear FE0 / FE1 here? Don't think so. */ + +#ifdef DEBUG + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + u32 f; + kvm_cvt_df(&vcpu->arch.fpr[i], &f); + dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", + i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); + } +#endif + + switch (get_op(inst)) { + case OP_PSQ_L: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_PSQ_LU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_PSQ_ST: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_PSQ_STU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case 4: + /* X form */ + switch (inst_get_field(inst, 21, 30)) { + case OP_4X_PS_CMPU0: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PSQ_LX: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_4X_PS_CMPO0: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PSQ_LUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_4X_PS_NEG: + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + vcpu->arch.qpr[ax_rd] ^= 0x80000000; + break; + case OP_4X_PS_CMPU1: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PS_MR: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + break; + case OP_4X_PS_CMPO1: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PS_NABS: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + vcpu->arch.qpr[ax_rd] |= 0x80000000; + break; + case OP_4X_PS_ABS: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + vcpu->arch.qpr[ax_rd] &= ~0x80000000; + break; + case OP_4X_PS_MERGE00: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; + /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ + kvm_cvt_df(&vcpu->arch.fpr[ax_rb], + &vcpu->arch.qpr[ax_rd]); + break; + case OP_4X_PS_MERGE01: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + break; + case OP_4X_PS_MERGE10: + WARN_ON(rcomp); + /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], + &vcpu->arch.fpr[ax_rd]); + /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ + kvm_cvt_df(&vcpu->arch.fpr[ax_rb], + &vcpu->arch.qpr[ax_rd]); + break; + case OP_4X_PS_MERGE11: + WARN_ON(rcomp); + /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], + &vcpu->arch.fpr[ax_rd]); + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + break; + } + /* XW form */ + switch (inst_get_field(inst, 25, 30)) { + case OP_4XW_PSQ_STX: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_4XW_PSQ_STUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + } + /* A form */ + switch (inst_get_field(inst, 26, 30)) { + case OP_4A_PS_SUM1: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; + break; + case OP_4A_PS_SUM0: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds); + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc]; + break; + case OP_4A_PS_MULS0: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls); + break; + case OP_4A_PS_MULS1: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, SCALAR_LOW, fps_fmuls); + break; + case OP_4A_PS_MADDS0: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds); + break; + case OP_4A_PS_MADDS1: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds); + break; + case OP_4A_PS_DIV: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NONE, fps_fdivs); + break; + case OP_4A_PS_SUB: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NONE, fps_fsubs); + break; + case OP_4A_PS_ADD: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NONE, fps_fadds); + break; + case OP_4A_PS_SEL: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel); + break; + case OP_4A_PS_RES: + emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, + ax_rb, fps_fres); + break; + case OP_4A_PS_MUL: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, SCALAR_NONE, fps_fmuls); + break; + case OP_4A_PS_RSQRTE: + emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, + ax_rb, fps_frsqrte); + break; + case OP_4A_PS_MSUB: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs); + break; + case OP_4A_PS_MADD: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds); + break; + case OP_4A_PS_NMSUB: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs); + break; + case OP_4A_PS_NMADD: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds); + break; + } + break; + + /* Real FPU operations */ + + case OP_LFS: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + break; + } + case OP_LFSU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_LFD: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + break; + } + case OP_LFDU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_STFS: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + break; + } + case OP_STFSU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_STFD: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + break; + } + case OP_STFDU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case 31: + switch (inst_get_field(inst, 21, 30)) { + case OP_31_LFSX: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + break; + } + case OP_31_LFSUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_LFDX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + break; + } + case OP_31_LFDUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_STFSX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + break; + } + case OP_31_STFSUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_STFX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + break; + } + case OP_31_STFUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_STFIWX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, + FPU_LS_SINGLE_LOW); + break; + } + break; + } + break; + case 59: + switch (inst_get_field(inst, 21, 30)) { + case OP_59_FADDS: + fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FSUBS: + fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FDIVS: + fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FRES: + fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FRSQRTES: + fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + } + switch (inst_get_field(inst, 26, 30)) { + case OP_59_FMULS: + fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FMSUBS: + fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FMADDS: + fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FNMSUBS: + fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FNMADDS: + fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + } + break; + case 63: + switch (inst_get_field(inst, 21, 30)) { + case OP_63_MTFSB0: + case OP_63_MTFSB1: + case OP_63_MCRFS: + case OP_63_MTFSFI: + /* XXX need to implement */ + break; + case OP_63_MFFS: + /* XXX missing CR */ + *fpr_d = vcpu->arch.fpscr; + break; + case OP_63_MTFSF: + /* XXX missing fm bits */ + /* XXX missing CR */ + vcpu->arch.fpscr = *fpr_b; + break; + case OP_63_FCMPU: + { + u32 tmp_cr; + u32 cr0_mask = 0xf0000000; + u32 cr_shift = inst_get_field(inst, 6, 8) * 4; + + fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + cr &= ~(cr0_mask >> cr_shift); + cr |= (cr & cr0_mask) >> cr_shift; + break; + } + case OP_63_FCMPO: + { + u32 tmp_cr; + u32 cr0_mask = 0xf0000000; + u32 cr_shift = inst_get_field(inst, 6, 8) * 4; + + fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + cr &= ~(cr0_mask >> cr_shift); + cr |= (cr & cr0_mask) >> cr_shift; + break; + } + case OP_63_FNEG: + fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FMR: + *fpr_d = *fpr_b; + break; + case OP_63_FABS: + fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FCPSGN: + fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FDIV: + fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FADD: + fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FSUB: + fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FCTIW: + fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FCTIWZ: + fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FRSP: + fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_63_FRSQRTE: + { + double one = 1.0f; + + /* fD = sqrt(fB) */ + fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + /* fD = 1.0f / fD */ + fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); + break; + } + } + switch (inst_get_field(inst, 26, 30)) { + case OP_63_FMUL: + fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + break; + case OP_63_FSEL: + fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FMSUB: + fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FMADD: + fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FNMSUB: + fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FNMADD: + fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + } + break; + } + +#ifdef DEBUG + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + u32 f; + kvm_cvt_df(&vcpu->arch.fpr[i], &f); + dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); + } +#endif + + if (rcomp) + kvmppc_set_cr(vcpu, cr); + + preempt_enable(); + + return emulated; +} diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c new file mode 100644 index 00000000..7759053d --- /dev/null +++ b/arch/powerpc/kvm/book3s_pr.c @@ -0,0 +1,1176 @@ +/* + * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * Kevin Wolf <mail@kevin-wolf.de> + * Paul Mackerras <paulus@samba.org> + * + * Description: + * Functions relating to running KVM on Book 3S processors where + * we don't have access to hypervisor mode, and we run the guest + * in problem state (user mode). + * + * This file is derived from arch/powerpc/kvm/44x.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> +#include <linux/export.h> +#include <linux/err.h> +#include <linux/slab.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu_context.h> +#include <asm/switch_to.h> +#include <linux/gfp.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> +#include <linux/highmem.h> + +#include "trace.h" + +/* #define EXIT_DEBUG */ +/* #define DEBUG_EXT */ + +static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, + ulong msr); + +/* Some compatibility defines */ +#ifdef CONFIG_PPC_BOOK3S_32 +#define MSR_USER32 MSR_USER +#define MSR_USER64 MSR_USER +#define HW_PAGE_SIZE PAGE_SIZE +#define __hard_irq_disable local_irq_disable +#define __hard_irq_enable local_irq_enable +#endif + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); + memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, + sizeof(get_paca()->shadow_vcpu)); + svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; + svcpu_put(svcpu); +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 + current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; +#endif +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); + memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, + sizeof(get_paca()->shadow_vcpu)); + to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; + svcpu_put(svcpu); +#endif + + kvmppc_giveup_ext(vcpu, MSR_FP); + kvmppc_giveup_ext(vcpu, MSR_VEC); + kvmppc_giveup_ext(vcpu, MSR_VSX); +} + +static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) +{ + ulong smsr = vcpu->arch.shared->msr; + + /* Guest MSR values */ + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; + /* Process MSR values */ + smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; + /* External providers the guest reserved */ + smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); + /* 64-bit Process MSR values */ +#ifdef CONFIG_PPC_BOOK3S_64 + smsr |= MSR_ISF | MSR_HV; +#endif + vcpu->arch.shadow_msr = smsr; +} + +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +{ + ulong old_msr = vcpu->arch.shared->msr; + +#ifdef EXIT_DEBUG + printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); +#endif + + msr &= to_book3s(vcpu)->msr_mask; + vcpu->arch.shared->msr = msr; + kvmppc_recalc_shadow_msr(vcpu); + + if (msr & MSR_POW) { + if (!vcpu->arch.pending_exceptions) { + kvm_vcpu_block(vcpu); + vcpu->stat.halt_wakeup++; + + /* Unset POW bit after we woke up */ + msr &= ~MSR_POW; + vcpu->arch.shared->msr = msr; + } + } + + if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != + (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { + kvmppc_mmu_flush_segments(vcpu); + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + + /* Preload magic page segment when in kernel mode */ + if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) { + struct kvm_vcpu_arch *a = &vcpu->arch; + + if (msr & MSR_DR) + kvmppc_mmu_map_segment(vcpu, a->magic_page_ea); + else + kvmppc_mmu_map_segment(vcpu, a->magic_page_pa); + } + } + + /* Preload FPU if it's enabled */ + if (vcpu->arch.shared->msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); +} + +void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +{ + u32 host_pvr; + + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; + vcpu->arch.pvr = pvr; +#ifdef CONFIG_PPC_BOOK3S_64 + if ((pvr >= 0x330000) && (pvr < 0x70330000)) { + kvmppc_mmu_book3s_64_init(vcpu); + if (!to_book3s(vcpu)->hior_explicit) + to_book3s(vcpu)->hior = 0xfff00000; + to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; + vcpu->arch.cpu_type = KVM_CPU_3S_64; + } else +#endif + { + kvmppc_mmu_book3s_32_init(vcpu); + if (!to_book3s(vcpu)->hior_explicit) + to_book3s(vcpu)->hior = 0; + to_book3s(vcpu)->msr_mask = 0xffffffffULL; + vcpu->arch.cpu_type = KVM_CPU_3S_32; + } + + kvmppc_sanity_check(vcpu); + + /* If we are in hypervisor level on 970, we can tell the CPU to + * treat DCBZ as 32 bytes store */ + vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; + if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && + !strcmp(cur_cpu_spec->platform, "ppc970")) + vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; + + /* Cell performs badly if MSR_FEx are set. So let's hope nobody + really needs them in a VM on Cell and force disable them. */ + if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) + to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); + +#ifdef CONFIG_PPC_BOOK3S_32 + /* 32 bit Book3S always has 32 byte dcbz */ + vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; +#endif + + /* On some CPUs we can execute paired single operations natively */ + asm ( "mfpvr %0" : "=r"(host_pvr)); + switch (host_pvr) { + case 0x00080200: /* lonestar 2.0 */ + case 0x00088202: /* lonestar 2.2 */ + case 0x70000100: /* gekko 1.0 */ + case 0x00080100: /* gekko 2.0 */ + case 0x00083203: /* gekko 2.3a */ + case 0x00083213: /* gekko 2.3b */ + case 0x00083204: /* gekko 2.4 */ + case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ + case 0x00087200: /* broadway */ + vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; + /* Enable HID2.PSE - in case we need it later */ + mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); + } +} + +/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To + * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to + * emulate 32 bytes dcbz length. + * + * The Book3s_64 inventors also realized this case and implemented a special bit + * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. + * + * My approach here is to patch the dcbz instruction on executing pages. + */ +static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ + struct page *hpage; + u64 hpage_offset; + u32 *page; + int i; + + hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (is_error_page(hpage)) { + kvm_release_page_clean(hpage); + return; + } + + hpage_offset = pte->raddr & ~PAGE_MASK; + hpage_offset &= ~0xFFFULL; + hpage_offset /= 4; + + get_page(hpage); + page = kmap_atomic(hpage); + + /* patch dcbz into reserved instruction, so we trap */ + for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) + if ((page[i] & 0xff0007ff) == INS_DCBZ) + page[i] &= 0xfffffff7; + + kunmap_atomic(page); + put_page(hpage); +} + +static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + ulong mp_pa = vcpu->arch.magic_page_pa; + + if (unlikely(mp_pa) && + unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + return 1; + } + + return kvm_is_visible_gfn(vcpu->kvm, gfn); +} + +int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, + ulong eaddr, int vec) +{ + bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); + int r = RESUME_GUEST; + int relocated; + int page_found = 0; + struct kvmppc_pte pte; + bool is_mmio = false; + bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; + bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; + u64 vsid; + + relocated = data ? dr : ir; + + /* Resolve real address if translation turned on */ + if (relocated) { + page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); + } else { + pte.may_execute = true; + pte.may_read = true; + pte.may_write = true; + pte.raddr = eaddr & KVM_PAM; + pte.eaddr = eaddr; + pte.vpage = eaddr >> 12; + } + + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + case 0: + pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); + break; + case MSR_DR: + case MSR_IR: + vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); + + if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) + pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); + else + pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); + pte.vpage |= vsid; + + if (vsid == -1) + page_found = -EINVAL; + break; + } + + if (vcpu->arch.mmu.is_dcbz32(vcpu) && + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { + /* + * If we do the dcbz hack, we have to NX on every execution, + * so we can patch the executing code. This renders our guest + * NX-less. + */ + pte.may_execute = !data; + } + + if (page_found == -ENOENT) { + /* Page not found in guest PTE entries */ + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = svcpu->fault_dsisr; + vcpu->arch.shared->msr |= + (svcpu->shadow_srr1 & 0x00000000f8000000ULL); + svcpu_put(svcpu); + kvmppc_book3s_queue_irqprio(vcpu, vec); + } else if (page_found == -EPERM) { + /* Storage protection */ + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; + vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; + vcpu->arch.shared->msr |= + svcpu->shadow_srr1 & 0x00000000f8000000ULL; + svcpu_put(svcpu); + kvmppc_book3s_queue_irqprio(vcpu, vec); + } else if (page_found == -EINVAL) { + /* Page not found in guest SLB */ + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); + } else if (!is_mmio && + kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { + /* The guest's PTE is not mapped yet. Map on the host */ + kvmppc_mmu_map_page(vcpu, &pte); + if (data) + vcpu->stat.sp_storage++; + else if (vcpu->arch.mmu.is_dcbz32(vcpu) && + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) + kvmppc_patch_dcbz(vcpu, &pte); + } else { + /* MMIO */ + vcpu->stat.mmio_exits++; + vcpu->arch.paddr_accessed = pte.raddr; + r = kvmppc_emulate_mmio(run, vcpu); + if ( r == RESUME_HOST_NV ) + r = RESUME_HOST; + } + + return r; +} + +static inline int get_fpr_index(int i) +{ +#ifdef CONFIG_VSX + i *= 2; +#endif + return i; +} + +/* Give up external provider (FPU, Altivec, VSX) */ +void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) +{ + struct thread_struct *t = ¤t->thread; + u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX + u64 *vcpu_vsx = vcpu->arch.vsr; +#endif + u64 *thread_fpr = (u64*)t->fpr; + int i; + + if (!(vcpu->arch.guest_owned_ext & msr)) + return; + +#ifdef DEBUG_EXT + printk(KERN_INFO "Giving up ext 0x%lx\n", msr); +#endif + + switch (msr) { + case MSR_FP: + giveup_fpu(current); + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) + vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; + + vcpu->arch.fpscr = t->fpscr.val; + break; + case MSR_VEC: +#ifdef CONFIG_ALTIVEC + giveup_altivec(current); + memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); + vcpu->arch.vscr = t->vscr; +#endif + break; + case MSR_VSX: +#ifdef CONFIG_VSX + __giveup_vsx(current); + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) + vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; +#endif + break; + default: + BUG(); + } + + vcpu->arch.guest_owned_ext &= ~msr; + current->thread.regs->msr &= ~msr; + kvmppc_recalc_shadow_msr(vcpu); +} + +static int kvmppc_read_inst(struct kvm_vcpu *vcpu) +{ + ulong srr0 = kvmppc_get_pc(vcpu); + u32 last_inst = kvmppc_get_last_inst(vcpu); + int ret; + + ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); + if (ret == -ENOENT) { + ulong msr = vcpu->arch.shared->msr; + + msr = kvmppc_set_field(msr, 33, 33, 1); + msr = kvmppc_set_field(msr, 34, 36, 0); + vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); + return EMULATE_AGAIN; + } + + return EMULATE_DONE; +} + +static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) +{ + + /* Need to do paired single emulation? */ + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) + return EMULATE_DONE; + + /* Read out the instruction */ + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) + /* Need to emulate */ + return EMULATE_FAIL; + + return EMULATE_AGAIN; +} + +/* Handle external providers (FPU, Altivec, VSX) */ +static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, + ulong msr) +{ + struct thread_struct *t = ¤t->thread; + u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX + u64 *vcpu_vsx = vcpu->arch.vsr; +#endif + u64 *thread_fpr = (u64*)t->fpr; + int i; + + /* When we have paired singles, we emulate in software */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) + return RESUME_GUEST; + + if (!(vcpu->arch.shared->msr & msr)) { + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + return RESUME_GUEST; + } + + /* We already own the ext */ + if (vcpu->arch.guest_owned_ext & msr) { + return RESUME_GUEST; + } + +#ifdef DEBUG_EXT + printk(KERN_INFO "Loading up ext 0x%lx\n", msr); +#endif + + current->thread.regs->msr |= msr; + + switch (msr) { + case MSR_FP: + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) + thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; + + t->fpscr.val = vcpu->arch.fpscr; + t->fpexc_mode = 0; + kvmppc_load_up_fpu(); + break; + case MSR_VEC: +#ifdef CONFIG_ALTIVEC + memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); + t->vscr = vcpu->arch.vscr; + t->vrsave = -1; + kvmppc_load_up_altivec(); +#endif + break; + case MSR_VSX: +#ifdef CONFIG_VSX + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) + thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; + kvmppc_load_up_vsx(); +#endif + break; + default: + BUG(); + } + + vcpu->arch.guest_owned_ext |= msr; + + kvmppc_recalc_shadow_msr(vcpu); + + return RESUME_GUEST; +} + +int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + int r = RESUME_HOST; + + vcpu->stat.sum_exits++; + + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + trace_kvm_book3s_exit(exit_nr, vcpu); + preempt_enable(); + kvm_resched(vcpu); + switch (exit_nr) { + case BOOK3S_INTERRUPT_INST_STORAGE: + { + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + ulong shadow_srr1 = svcpu->shadow_srr1; + vcpu->stat.pf_instruc++; + +#ifdef CONFIG_PPC_BOOK3S_32 + /* We set segments as unused segments when invalidating them. So + * treat the respective fault as segment fault. */ + if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + r = RESUME_GUEST; + svcpu_put(svcpu); + break; + } +#endif + svcpu_put(svcpu); + + /* only care about PTEG not found errors, but leave NX alone */ + if (shadow_srr1 & 0x40000000) { + r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); + vcpu->stat.sp_instruc++; + } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { + /* + * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, + * so we can't use the NX bit inside the guest. Let's cross our fingers, + * that no guest that needs the dcbz hack does NX. + */ + kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); + r = RESUME_GUEST; + } else { + vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } + break; + } + case BOOK3S_INTERRUPT_DATA_STORAGE: + { + ulong dar = kvmppc_get_fault_dar(vcpu); + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + u32 fault_dsisr = svcpu->fault_dsisr; + vcpu->stat.pf_storage++; + +#ifdef CONFIG_PPC_BOOK3S_32 + /* We set segments as unused segments when invalidating them. So + * treat the respective fault as segment fault. */ + if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, dar); + r = RESUME_GUEST; + svcpu_put(svcpu); + break; + } +#endif + svcpu_put(svcpu); + + /* The only case we need to handle is missing shadow PTEs */ + if (fault_dsisr & DSISR_NOHPTE) { + r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); + } else { + vcpu->arch.shared->dar = dar; + vcpu->arch.shared->dsisr = fault_dsisr; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } + break; + } + case BOOK3S_INTERRUPT_DATA_SEGMENT: + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_DATA_SEGMENT); + } + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_INST_SEGMENT: + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_INST_SEGMENT); + } + r = RESUME_GUEST; + break; + /* We're good on these - the host merely wanted to get our attention */ + case BOOK3S_INTERRUPT_DECREMENTER: + vcpu->stat.dec_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_EXTERNAL: + vcpu->stat.ext_intr_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PERFMON: + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PROGRAM: + { + enum emulation_result er; + struct kvmppc_book3s_shadow_vcpu *svcpu; + ulong flags; + +program_interrupt: + svcpu = svcpu_get(vcpu); + flags = svcpu->shadow_srr1 & 0x1f0000ull; + svcpu_put(svcpu); + + if (vcpu->arch.shared->msr & MSR_PR) { +#ifdef EXIT_DEBUG + printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); +#endif + if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != + (INS_DCBZ & 0xfffffff7)) { + kvmppc_core_queue_program(vcpu, flags); + r = RESUME_GUEST; + break; + } + } + + vcpu->stat.emulated_inst_exits++; + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + r = RESUME_GUEST_NV; + break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; + case EMULATE_FAIL: + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", + __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + kvmppc_core_queue_program(vcpu, flags); + r = RESUME_GUEST; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + r = RESUME_HOST_NV; + break; + default: + BUG(); + } + break; + } + case BOOK3S_INTERRUPT_SYSCALL: + if (vcpu->arch.papr_enabled && + (kvmppc_get_last_inst(vcpu) == 0x44000022) && + !(vcpu->arch.shared->msr & MSR_PR)) { + /* SC 1 papr hypercalls */ + ulong cmd = kvmppc_get_gpr(vcpu, 3); + int i; + +#ifdef CONFIG_KVM_BOOK3S_64_PR + if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { + r = RESUME_GUEST; + break; + } +#endif + + run->papr_hcall.nr = cmd; + for (i = 0; i < 9; ++i) { + ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); + run->papr_hcall.args[i] = gpr; + } + run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->arch.hcall_needed = 1; + r = RESUME_HOST; + } else if (vcpu->arch.osi_enabled && + (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && + (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { + /* MOL hypercalls */ + u64 *gprs = run->osi.gprs; + int i; + + run->exit_reason = KVM_EXIT_OSI; + for (i = 0; i < 32; i++) + gprs[i] = kvmppc_get_gpr(vcpu, i); + vcpu->arch.osi_needed = 1; + r = RESUME_HOST_NV; + } else if (!(vcpu->arch.shared->msr & MSR_PR) && + (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { + /* KVM PV hypercalls */ + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + r = RESUME_GUEST; + } else { + /* Guest syscalls */ + vcpu->stat.syscall_exits++; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } + break; + case BOOK3S_INTERRUPT_FP_UNAVAIL: + case BOOK3S_INTERRUPT_ALTIVEC: + case BOOK3S_INTERRUPT_VSX: + { + int ext_msr = 0; + + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; + case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; + case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + } + + switch (kvmppc_check_ext(vcpu, exit_nr)) { + case EMULATE_DONE: + /* everything ok - let's enable the ext */ + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + break; + case EMULATE_FAIL: + /* we need to emulate this instruction */ + goto program_interrupt; + break; + default: + /* nothing to worry about - go again */ + break; + } + break; + } + case BOOK3S_INTERRUPT_ALIGNMENT: + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { + vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, + kvmppc_get_last_inst(vcpu)); + vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, + kvmppc_get_last_inst(vcpu)); + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + } + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_MACHINE_CHECK: + case BOOK3S_INTERRUPT_TRACE: + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + break; + default: + { + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + ulong shadow_srr1 = svcpu->shadow_srr1; + svcpu_put(svcpu); + /* Ugh - bork here! What did we get? */ + printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", + exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); + r = RESUME_HOST; + BUG(); + break; + } + } + + preempt_disable(); + if (!(r & RESUME_HOST)) { + /* To avoid clobbering exit_reason, only check for signals if + * we aren't already exiting to userspace for some other + * reason. */ + + /* + * Interrupts could be timers for the guest which we have to + * inject again, so let's postpone them until we're in the guest + * and if we really did time things so badly, then we just exit + * again due to a host external interrupt. + */ + __hard_irq_disable(); + if (signal_pending(current)) { + __hard_irq_enable(); +#ifdef EXIT_DEBUG + printk(KERN_EMERG "KVM: Going back to host\n"); +#endif + vcpu->stat.signal_exits++; + run->exit_reason = KVM_EXIT_INTR; + r = -EINTR; + } else { + /* In case an interrupt came in that was triggered + * from userspace (like DEC), we need to check what + * to inject now! */ + kvmppc_core_prepare_to_enter(vcpu); + } + } + + trace_kvm_book3s_reenter(r, vcpu); + + return r; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int i; + + sregs->pvr = vcpu->arch.pvr; + + sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; + if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { + for (i = 0; i < 64; i++) { + sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i; + sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; + } + } else { + for (i = 0; i < 16; i++) + sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; + + for (i = 0; i < 8; i++) { + sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; + sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; + } + } + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int i; + + kvmppc_set_pvr(vcpu, sregs->pvr); + + vcpu3s->sdr1 = sregs->u.s.sdr1; + if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { + for (i = 0; i < 64; i++) { + vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, + sregs->u.s.ppc64.slb[i].slbe); + } + } else { + for (i = 0; i < 16; i++) { + vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); + } + for (i = 0; i < 8; i++) { + kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false, + (u32)sregs->u.s.ppc32.ibat[i]); + kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true, + (u32)(sregs->u.s.ppc32.ibat[i] >> 32)); + kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false, + (u32)sregs->u.s.ppc32.dbat[i]); + kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true, + (u32)(sregs->u.s.ppc32.dbat[i] >> 32)); + } + } + + /* Flush the MMU after messing with the segments */ + kvmppc_mmu_pte_flush(vcpu, 0, 0); + + return 0; +} + +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = copy_to_user((u64 __user *)(long)reg->addr, + &to_book3s(vcpu)->hior, sizeof(u64)); + break; + default: + break; + } + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_HIOR: + r = copy_from_user(&to_book3s(vcpu)->hior, + (u64 __user *)(long)reg->addr, sizeof(u64)); + if (!r) + to_book3s(vcpu)->hior_explicit = true; + break; + default: + break; + } + + return r; +} + +int kvmppc_core_check_processor_compat(void) +{ + return 0; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s; + struct kvm_vcpu *vcpu; + int err = -ENOMEM; + unsigned long p; + + vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); + if (!vcpu_book3s) + goto out; + + vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) + kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); + if (!vcpu_book3s->shadow_vcpu) + goto free_vcpu; + + vcpu = &vcpu_book3s->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_shadow_vcpu; + + p = __get_free_page(GFP_KERNEL|__GFP_ZERO); + /* the real shared page fills the last 4k of our page */ + vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); + if (!p) + goto uninit_vcpu; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* default to book3s_64 (970fx) */ + vcpu->arch.pvr = 0x3C0301; +#else + /* default to book3s_32 (750) */ + vcpu->arch.pvr = 0x84202; +#endif + kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + vcpu->arch.slb_nr = 64; + + vcpu->arch.shadow_msr = MSR_USER64; + + err = kvmppc_mmu_init(vcpu); + if (err < 0) + goto uninit_vcpu; + + return vcpu; + +uninit_vcpu: + kvm_vcpu_uninit(vcpu); +free_shadow_vcpu: + kfree(vcpu_book3s->shadow_vcpu); +free_vcpu: + vfree(vcpu_book3s); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + + free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); + kvm_vcpu_uninit(vcpu); + kfree(vcpu_book3s->shadow_vcpu); + vfree(vcpu_book3s); +} + +int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + int ret; + double fpr[32][TS_FPRWIDTH]; + unsigned int fpscr; + int fpexc_mode; +#ifdef CONFIG_ALTIVEC + vector128 vr[32]; + vector128 vscr; + unsigned long uninitialized_var(vrsave); + int used_vr; +#endif +#ifdef CONFIG_VSX + int used_vsr; +#endif + ulong ext_msr; + + preempt_disable(); + + /* Check if we can run the vcpu at all */ + if (!vcpu->arch.sane) { + kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = -EINVAL; + goto out; + } + + kvmppc_core_prepare_to_enter(vcpu); + + /* + * Interrupts could be timers for the guest which we have to inject + * again, so let's postpone them until we're in the guest and if we + * really did time things so badly, then we just exit again due to + * a host external interrupt. + */ + __hard_irq_disable(); + + /* No need to go into the guest when all we do is going out */ + if (signal_pending(current)) { + __hard_irq_enable(); + kvm_run->exit_reason = KVM_EXIT_INTR; + ret = -EINTR; + goto out; + } + + /* Save FPU state in stack */ + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); + memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); + fpscr = current->thread.fpscr.val; + fpexc_mode = current->thread.fpexc_mode; + +#ifdef CONFIG_ALTIVEC + /* Save Altivec state in stack */ + used_vr = current->thread.used_vr; + if (used_vr) { + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); + memcpy(vr, current->thread.vr, sizeof(current->thread.vr)); + vscr = current->thread.vscr; + vrsave = current->thread.vrsave; + } +#endif + +#ifdef CONFIG_VSX + /* Save VSX state in stack */ + used_vsr = current->thread.used_vsr; + if (used_vsr && (current->thread.regs->msr & MSR_VSX)) + __giveup_vsx(current); +#endif + + /* Remember the MSR with disabled extensions */ + ext_msr = current->thread.regs->msr; + + /* Preload FPU if it's enabled */ + if (vcpu->arch.shared->msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); + + kvm_guest_enter(); + + ret = __kvmppc_vcpu_run(kvm_run, vcpu); + + kvm_guest_exit(); + + current->thread.regs->msr = ext_msr; + + /* Make sure we save the guest FPU/Altivec/VSX state */ + kvmppc_giveup_ext(vcpu, MSR_FP); + kvmppc_giveup_ext(vcpu, MSR_VEC); + kvmppc_giveup_ext(vcpu, MSR_VSX); + + /* Restore FPU state from stack */ + memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); + current->thread.fpscr.val = fpscr; + current->thread.fpexc_mode = fpexc_mode; + +#ifdef CONFIG_ALTIVEC + /* Restore Altivec state from stack */ + if (used_vr && current->thread.used_vr) { + memcpy(current->thread.vr, vr, sizeof(current->thread.vr)); + current->thread.vscr = vscr; + current->thread.vrsave = vrsave; + } + current->thread.used_vr = used_vr; +#endif + +#ifdef CONFIG_VSX + current->thread.used_vsr = used_vsr; +#endif + +out: + preempt_enable(); + return ret; +} + +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + struct kvm_vcpu *vcpu; + ulong ga, ga_end; + int is_dirty = 0; + int r; + unsigned long n; + + mutex_lock(&kvm->slots_lock); + + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { + memslot = id_to_memslot(kvm->memslots, log->slot); + + ga = memslot->base_gfn << PAGE_SHIFT; + ga_end = ga + (memslot->npages << PAGE_SHIFT); + + kvm_for_each_vcpu(n, vcpu, kvm) + kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); + + n = kvm_dirty_bitmap_bytes(memslot); + memset(memslot->dirty_bitmap, 0, n); + } + + r = 0; +out: + mutex_unlock(&kvm->slots_lock); + return r; +} + +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + return 0; +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + +static int kvmppc_book3s_init(void) +{ + int r; + + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, + THIS_MODULE); + + if (r) + return r; + + r = kvmppc_mmu_hpte_sysinit(); + + return r; +} + +static void kvmppc_book3s_exit(void) +{ + kvmppc_mmu_hpte_sysexit(); + kvm_exit(); +} + +module_init(kvmppc_book3s_init); +module_exit(kvmppc_book3s_exit); diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c new file mode 100644 index 00000000..b9589324 --- /dev/null +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2011. Freescale Inc. All rights reserved. + * + * Authors: + * Alexander Graf <agraf@suse.de> + * Paul Mackerras <paulus@samba.org> + * + * Description: + * + * Hypercall handling for running PAPR guests in PR KVM on Book 3S + * processors. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <asm/uaccess.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> + +static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + unsigned long pteg_addr; + + pte_index <<= 4; + pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70; + pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL; + pteg_addr |= pte_index; + + return pteg_addr; +} + +static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) +{ + long flags = kvmppc_get_gpr(vcpu, 4); + long pte_index = kvmppc_get_gpr(vcpu, 5); + unsigned long pteg[2 * 8]; + unsigned long pteg_addr, i, *hpte; + + pte_index &= ~7UL; + pteg_addr = get_pteg_addr(vcpu, pte_index); + + copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); + hpte = pteg; + + if (likely((flags & H_EXACT) == 0)) { + pte_index &= ~7UL; + for (i = 0; ; ++i) { + if (i == 8) + return H_PTEG_FULL; + if ((*hpte & HPTE_V_VALID) == 0) + break; + hpte += 2; + } + } else { + i = kvmppc_get_gpr(vcpu, 5) & 7UL; + hpte += i * 2; + } + + hpte[0] = kvmppc_get_gpr(vcpu, 6); + hpte[1] = kvmppc_get_gpr(vcpu, 7); + copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); + kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + kvmppc_set_gpr(vcpu, 4, pte_index | i); + + return EMULATE_DONE; +} + +static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) +{ + unsigned long flags= kvmppc_get_gpr(vcpu, 4); + unsigned long pte_index = kvmppc_get_gpr(vcpu, 5); + unsigned long avpn = kvmppc_get_gpr(vcpu, 6); + unsigned long v = 0, pteg, rb; + unsigned long pte[2]; + + pteg = get_pteg_addr(vcpu, pte_index); + copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + + if ((pte[0] & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || + ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { + kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); + return EMULATE_DONE; + } + + copy_to_user((void __user *)pteg, &v, sizeof(v)); + + rb = compute_tlbie_rb(pte[0], pte[1], pte_index); + vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); + + kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + kvmppc_set_gpr(vcpu, 4, pte[0]); + kvmppc_set_gpr(vcpu, 5, pte[1]); + + return EMULATE_DONE; +} + +static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) +{ + unsigned long flags = kvmppc_get_gpr(vcpu, 4); + unsigned long pte_index = kvmppc_get_gpr(vcpu, 5); + unsigned long avpn = kvmppc_get_gpr(vcpu, 6); + unsigned long rb, pteg, r, v; + unsigned long pte[2]; + + pteg = get_pteg_addr(vcpu, pte_index); + copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + + if ((pte[0] & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { + kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); + return EMULATE_DONE; + } + + v = pte[0]; + r = pte[1]; + r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI | + HPTE_R_KEY_LO); + r |= (flags << 55) & HPTE_R_PP0; + r |= (flags << 48) & HPTE_R_KEY_HI; + r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); + + pte[1] = r; + + rb = compute_tlbie_rb(v, r, pte_index); + vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); + copy_to_user((void __user *)pteg, pte, sizeof(pte)); + + kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + + return EMULATE_DONE; +} + +int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) +{ + switch (cmd) { + case H_ENTER: + return kvmppc_h_pr_enter(vcpu); + case H_REMOVE: + return kvmppc_h_pr_remove(vcpu); + case H_PROTECT: + return kvmppc_h_pr_protect(vcpu); + case H_BULK_REMOVE: + /* We just flush all PTEs, so user space can + handle the HPT modifications */ + kvmppc_mmu_pte_flush(vcpu, 0, 0); + break; + case H_CEDE: + kvm_vcpu_block(vcpu); + vcpu->stat.halt_wakeup++; + return EMULATE_DONE; + } + + return EMULATE_FAIL; +} diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S new file mode 100644 index 00000000..34187585 --- /dev/null +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -0,0 +1,242 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> + +#ifdef CONFIG_PPC_BOOK3S_64 +#include <asm/exception-64s.h> +#endif + +/***************************************************************************** + * * + * Real Mode handlers that need to be in low physical memory * + * * + ****************************************************************************/ + +#if defined(CONFIG_PPC_BOOK3S_64) + +#define FUNC(name) GLUE(.,name) +#define MTMSR_EERI(reg) mtmsrd (reg),1 + + .globl kvmppc_skip_interrupt +kvmppc_skip_interrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_SRR0 + addi r13, r13, 4 + mtspr SPRN_SRR0, r13 + GET_SCRATCH0(r13) + rfid + b . + + .globl kvmppc_skip_Hinterrupt +kvmppc_skip_Hinterrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_HSRR0 + addi r13, r13, 4 + mtspr SPRN_HSRR0, r13 + GET_SCRATCH0(r13) + hrfid + b . + +#elif defined(CONFIG_PPC_BOOK3S_32) + +#define FUNC(name) name +#define MTMSR_EERI(reg) mtmsr (reg) + +.macro INTERRUPT_TRAMPOLINE intno + +.global kvmppc_trampoline_\intno +kvmppc_trampoline_\intno: + + mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */ + + /* + * First thing to do is to find out if we're coming + * from a KVM guest or a Linux process. + * + * To distinguish, we check a magic byte in the PACA/current + */ + mfspr r13, SPRN_SPRG_THREAD + lwz r13, THREAD_KVM_SVCPU(r13) + /* PPC32 can have a NULL pointer - let's check for that */ + mtspr SPRN_SPRG_SCRATCH1, r12 /* Save r12 */ + mfcr r12 + cmpwi r13, 0 + bne 1f +2: mtcr r12 + mfspr r12, SPRN_SPRG_SCRATCH1 + mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ + b kvmppc_resume_\intno /* Get back original handler */ + +1: tophys(r13, r13) + stw r12, HSTATE_SCRATCH1(r13) + mfspr r12, SPRN_SPRG_SCRATCH1 + stw r12, HSTATE_SCRATCH0(r13) + lbz r12, HSTATE_IN_GUEST(r13) + cmpwi r12, KVM_GUEST_MODE_NONE + bne ..kvmppc_handler_hasmagic_\intno + /* No KVM guest? Then jump back to the Linux handler! */ + lwz r12, HSTATE_SCRATCH1(r13) + b 2b + + /* Now we know we're handling a KVM guest */ +..kvmppc_handler_hasmagic_\intno: + + /* Should we just skip the faulting instruction? */ + cmpwi r12, KVM_GUEST_MODE_SKIP + beq kvmppc_handler_skip_ins + + /* Let's store which interrupt we're handling */ + li r12, \intno + + /* Jump into the SLB exit code that goes to the highmem handler */ + b kvmppc_handler_trampoline_exit + +.endm + +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DECREMENTER +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSCALL +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC + +/* + * Bring us back to the faulting code, but skip the + * faulting instruction. + * + * This is a generic exit path from the interrupt + * trampolines above. + * + * Input Registers: + * + * R12 = free + * R13 = Shadow VCPU (PACA) + * HSTATE.SCRATCH0 = guest R12 + * HSTATE.SCRATCH1 = guest CR + * SPRG_SCRATCH0 = guest R13 + * + */ +kvmppc_handler_skip_ins: + + /* Patch the IP to the next instruction */ + mfsrr0 r12 + addi r12, r12, 4 + mtsrr0 r12 + + /* Clean up all state */ + lwz r12, HSTATE_SCRATCH1(r13) + mtcr r12 + PPC_LL r12, HSTATE_SCRATCH0(r13) + GET_SCRATCH0(r13) + + /* And get back into the code */ + RFI +#endif + +/* + * Call kvmppc_handler_trampoline_enter in real mode + * + * On entry, r4 contains the guest shadow MSR + */ +_GLOBAL(kvmppc_entry_trampoline) + mfmsr r5 + LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) + toreal(r7) + + li r9, MSR_RI + ori r9, r9, MSR_EE + andc r9, r5, r9 /* Clear EE and RI in MSR value */ + li r6, MSR_IR | MSR_DR + ori r6, r6, MSR_EE + andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */ + MTMSR_EERI(r9) /* Clear EE and RI in MSR */ + mtsrr0 r7 /* before we set srr0/1 */ + mtsrr1 r6 + RFI + +#if defined(CONFIG_PPC_BOOK3S_32) +#define STACK_LR INT_FRAME_SIZE+4 + +/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */ +#define MSR_EXT_START \ + PPC_STL r20, _NIP(r1); \ + mfmsr r20; \ + LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ + andc r3,r20,r3; /* Disable DR,EE */ \ + mtmsr r3; \ + sync + +#define MSR_EXT_END \ + mtmsr r20; /* Enable DR,EE */ \ + sync; \ + PPC_LL r20, _NIP(r1) + +#elif defined(CONFIG_PPC_BOOK3S_64) +#define STACK_LR _LINK +#define MSR_EXT_START +#define MSR_EXT_END +#endif + +/* + * Activate current's external feature (FPU/Altivec/VSX) + */ +#define define_load_up(what) \ + \ +_GLOBAL(kvmppc_load_up_ ## what); \ + PPC_STLU r1, -INT_FRAME_SIZE(r1); \ + mflr r3; \ + PPC_STL r3, STACK_LR(r1); \ + MSR_EXT_START; \ + \ + bl FUNC(load_up_ ## what); \ + \ + MSR_EXT_END; \ + PPC_LL r3, STACK_LR(r1); \ + mtlr r3; \ + addi r1, r1, INT_FRAME_SIZE; \ + blr + +define_load_up(fpu) +#ifdef CONFIG_ALTIVEC +define_load_up(altivec) +#endif +#ifdef CONFIG_VSX +define_load_up(vsx) +#endif + +#include "book3s_segment.S" diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S new file mode 100644 index 00000000..6e6e9cef --- /dev/null +++ b/arch/powerpc/kvm/book3s_segment.S @@ -0,0 +1,367 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright SUSE Linux Products GmbH 2010 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +/* Real mode helpers */ + +#if defined(CONFIG_PPC_BOOK3S_64) + +#define GET_SHADOW_VCPU(reg) \ + mr reg, r13 +#define MTMSR_EERI(reg) mtmsrd (reg),1 + +#elif defined(CONFIG_PPC_BOOK3S_32) + +#define GET_SHADOW_VCPU(reg) \ + tophys(reg, r2); \ + lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ + tophys(reg, reg) +#define MTMSR_EERI(reg) mtmsr (reg) + +#endif + +/* Disable for nested KVM */ +#define USE_QUICK_LAST_INST + + +/* Get helper functions for subarch specific functionality */ + +#if defined(CONFIG_PPC_BOOK3S_64) +#include "book3s_64_slb.S" +#elif defined(CONFIG_PPC_BOOK3S_32) +#include "book3s_32_sr.S" +#endif + +/****************************************************************************** + * * + * Entry code * + * * + *****************************************************************************/ + +.global kvmppc_handler_trampoline_enter +kvmppc_handler_trampoline_enter: + + /* Required state: + * + * MSR = ~IR|DR + * R1 = host R1 + * R2 = host R2 + * R4 = guest shadow MSR + * R5 = normal host MSR + * R6 = current host MSR (EE, IR, DR off) + * LR = highmem guest exit code + * all other volatile GPRS = free + * SVCPU[CR] = guest CR + * SVCPU[XER] = guest XER + * SVCPU[CTR] = guest CTR + * SVCPU[LR] = guest LR + */ + + /* r3 = shadow vcpu */ + GET_SHADOW_VCPU(r3) + + /* Save guest exit handler address and MSR */ + mflr r0 + PPC_STL r0, HSTATE_VMHANDLER(r3) + PPC_STL r5, HSTATE_HOST_MSR(r3) + + /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */ + PPC_STL r1, HSTATE_HOST_R1(r3) + PPC_STL r2, HSTATE_HOST_R2(r3) + + /* Activate guest mode, so faults get handled by KVM */ + li r11, KVM_GUEST_MODE_GUEST + stb r11, HSTATE_IN_GUEST(r3) + + /* Switch to guest segment. This is subarch specific. */ + LOAD_GUEST_SEGMENTS + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Some guests may need to have dcbz set to 32 byte length. + * + * Usually we ensure that by patching the guest's instructions + * to trap on dcbz and emulate it in the hypervisor. + * + * If we can, we should tell the CPU to use 32 byte dcbz though, + * because that's a lot faster. + */ + lbz r0, HSTATE_RESTORE_HID5(r3) + cmpwi r0, 0 + beq no_dcbz32_on + + mfspr r0,SPRN_HID5 + ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */ + mtspr SPRN_HID5,r0 +no_dcbz32_on: + +#endif /* CONFIG_PPC_BOOK3S_64 */ + + /* Enter guest */ + + PPC_LL r8, SVCPU_CTR(r3) + PPC_LL r9, SVCPU_LR(r3) + lwz r10, SVCPU_CR(r3) + lwz r11, SVCPU_XER(r3) + + mtctr r8 + mtlr r9 + mtcr r10 + mtxer r11 + + /* Move SRR0 and SRR1 into the respective regs */ + PPC_LL r9, SVCPU_PC(r3) + /* First clear RI in our current MSR value */ + li r0, MSR_RI + andc r6, r6, r0 + MTMSR_EERI(r6) + mtsrr0 r9 + mtsrr1 r4 + + PPC_LL r0, SVCPU_R0(r3) + PPC_LL r1, SVCPU_R1(r3) + PPC_LL r2, SVCPU_R2(r3) + PPC_LL r4, SVCPU_R4(r3) + PPC_LL r5, SVCPU_R5(r3) + PPC_LL r6, SVCPU_R6(r3) + PPC_LL r7, SVCPU_R7(r3) + PPC_LL r8, SVCPU_R8(r3) + PPC_LL r9, SVCPU_R9(r3) + PPC_LL r10, SVCPU_R10(r3) + PPC_LL r11, SVCPU_R11(r3) + PPC_LL r12, SVCPU_R12(r3) + PPC_LL r13, SVCPU_R13(r3) + + PPC_LL r3, (SVCPU_R3)(r3) + + RFI +kvmppc_handler_trampoline_enter_end: + + + +/****************************************************************************** + * * + * Exit code * + * * + *****************************************************************************/ + +.global kvmppc_handler_trampoline_exit +kvmppc_handler_trampoline_exit: + +.global kvmppc_interrupt +kvmppc_interrupt: + + /* Register usage at this point: + * + * SPRG_SCRATCH0 = guest R13 + * R12 = exit handler id + * R13 = shadow vcpu (32-bit) or PACA (64-bit) + * HSTATE.SCRATCH0 = guest R12 + * HSTATE.SCRATCH1 = guest CR + * + */ + + /* Save registers */ + + PPC_STL r0, SVCPU_R0(r13) + PPC_STL r1, SVCPU_R1(r13) + PPC_STL r2, SVCPU_R2(r13) + PPC_STL r3, SVCPU_R3(r13) + PPC_STL r4, SVCPU_R4(r13) + PPC_STL r5, SVCPU_R5(r13) + PPC_STL r6, SVCPU_R6(r13) + PPC_STL r7, SVCPU_R7(r13) + PPC_STL r8, SVCPU_R8(r13) + PPC_STL r9, SVCPU_R9(r13) + PPC_STL r10, SVCPU_R10(r13) + PPC_STL r11, SVCPU_R11(r13) + + /* Restore R1/R2 so we can handle faults */ + PPC_LL r1, HSTATE_HOST_R1(r13) + PPC_LL r2, HSTATE_HOST_R2(r13) + + /* Save guest PC and MSR */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + andi. r0, r12, 0x2 + cmpwi cr1, r0, 0 + beq 1f + mfspr r3,SPRN_HSRR0 + mfspr r4,SPRN_HSRR1 + andi. r12,r12,0x3ffd + b 2f +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +#endif +1: mfsrr0 r3 + mfsrr1 r4 +2: + PPC_STL r3, SVCPU_PC(r13) + PPC_STL r4, SVCPU_SHADOW_SRR1(r13) + + /* Get scratch'ed off registers */ + GET_SCRATCH0(r9) + PPC_LL r8, HSTATE_SCRATCH0(r13) + lwz r7, HSTATE_SCRATCH1(r13) + + PPC_STL r9, SVCPU_R13(r13) + PPC_STL r8, SVCPU_R12(r13) + stw r7, SVCPU_CR(r13) + + /* Save more register state */ + + mfxer r5 + mfdar r6 + mfdsisr r7 + mfctr r8 + mflr r9 + + stw r5, SVCPU_XER(r13) + PPC_STL r6, SVCPU_FAULT_DAR(r13) + stw r7, SVCPU_FAULT_DSISR(r13) + PPC_STL r8, SVCPU_CTR(r13) + PPC_STL r9, SVCPU_LR(r13) + + /* + * In order for us to easily get the last instruction, + * we got the #vmexit at, we exploit the fact that the + * virtual layout is still the same here, so we can just + * ld from the guest's PC address + */ + + /* We only load the last instruction when it's safe */ + cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE + beq ld_last_inst + cmpwi r12, BOOK3S_INTERRUPT_PROGRAM + beq ld_last_inst + cmpwi r12, BOOK3S_INTERRUPT_SYSCALL + beq ld_last_prev_inst + cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT + beq- ld_last_inst +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST + beq- ld_last_inst +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +#endif + + b no_ld_last_inst + +ld_last_prev_inst: + addi r3, r3, -4 + +ld_last_inst: + /* Save off the guest instruction we're at */ + + /* In case lwz faults */ + li r0, KVM_INST_FETCH_FAILED + +#ifdef USE_QUICK_LAST_INST + + /* Set guest mode to 'jump over instruction' so if lwz faults + * we'll just continue at the next IP. */ + li r9, KVM_GUEST_MODE_SKIP + stb r9, HSTATE_IN_GUEST(r13) + + /* 1) enable paging for data */ + mfmsr r9 + ori r11, r9, MSR_DR /* Enable paging for data */ + mtmsr r11 + sync + /* 2) fetch the instruction */ + lwz r0, 0(r3) + /* 3) disable paging again */ + mtmsr r9 + sync + +#endif + stw r0, SVCPU_LAST_INST(r13) + +no_ld_last_inst: + + /* Unset guest mode */ + li r9, KVM_GUEST_MODE_NONE + stb r9, HSTATE_IN_GUEST(r13) + + /* Switch back to host MMU */ + LOAD_HOST_SEGMENTS + +#ifdef CONFIG_PPC_BOOK3S_64 + + lbz r5, HSTATE_RESTORE_HID5(r13) + cmpwi r5, 0 + beq no_dcbz32_off + + li r4, 0 + mfspr r5,SPRN_HID5 + rldimi r5,r4,6,56 + mtspr SPRN_HID5,r5 + +no_dcbz32_off: + +#endif /* CONFIG_PPC_BOOK3S_64 */ + + /* + * For some interrupts, we need to call the real Linux + * handler, so it can do work for us. This has to happen + * as if the interrupt arrived from the kernel though, + * so let's fake it here where most state is restored. + * + * Having set up SRR0/1 with the address where we want + * to continue with relocation on (potentially in module + * space), we either just go straight there with rfi[d], + * or we jump to an interrupt handler if there is an + * interrupt to be handled first. In the latter case, + * the rfi[d] at the end of the interrupt handler will + * get us back to where we want to continue. + */ + + /* Register usage at this point: + * + * R1 = host R1 + * R2 = host R2 + * R10 = raw exit handler id + * R12 = exit handler id + * R13 = shadow vcpu (32-bit) or PACA (64-bit) + * SVCPU.* = guest * + * + */ + + PPC_LL r6, HSTATE_HOST_MSR(r13) + PPC_LL r8, HSTATE_VMHANDLER(r13) + +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + beq cr1, 1f + mtspr SPRN_HSRR1, r6 + mtspr SPRN_HSRR0, r8 +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +#endif +1: /* Restore host msr -> SRR1 */ + mtsrr1 r6 + /* Load highmem handler address */ + mtsrr0 r8 + + /* RFI into the highmem handler, or jump to interrupt handler */ + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + beqa BOOK3S_INTERRUPT_EXTERNAL + cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER + beqa BOOK3S_INTERRUPT_DECREMENTER + cmpwi r12, BOOK3S_INTERRUPT_PERFMON + beqa BOOK3S_INTERRUPT_PERFMON + + RFI +kvmppc_handler_trampoline_exit_end: diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c new file mode 100644 index 00000000..ee9e1ee9 --- /dev/null +++ b/arch/powerpc/kvm/booke.c @@ -0,0 +1,1023 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * Copyright 2010-2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> + +#include <asm/cputable.h> +#include <asm/uaccess.h> +#include <asm/kvm_ppc.h> +#include "timing.h" +#include <asm/cacheflush.h> + +#include "booke.h" + +unsigned long kvmppc_booke_handlers; + +#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "mmio", VCPU_STAT(mmio_exits) }, + { "dcr", VCPU_STAT(dcr_exits) }, + { "sig", VCPU_STAT(signal_exits) }, + { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, + { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, + { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, + { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, + { "sysc", VCPU_STAT(syscall_exits) }, + { "isi", VCPU_STAT(isi_exits) }, + { "dsi", VCPU_STAT(dsi_exits) }, + { "inst_emu", VCPU_STAT(emulated_inst_exits) }, + { "dec", VCPU_STAT(dec_exits) }, + { "ext_intr", VCPU_STAT(ext_intr_exits) }, + { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { NULL } +}; + +/* TODO: use vcpu_printf() */ +void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) +{ + int i; + + printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr); + printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0, + vcpu->arch.shared->srr1); + + printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); + + for (i = 0; i < 32; i += 4) { + printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, + kvmppc_get_gpr(vcpu, i), + kvmppc_get_gpr(vcpu, i+1), + kvmppc_get_gpr(vcpu, i+2), + kvmppc_get_gpr(vcpu, i+3)); + } +} + +#ifdef CONFIG_SPE +void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + enable_kernel_spe(); + kvmppc_save_guest_spe(vcpu); + vcpu->arch.shadow_msr &= ~MSR_SPE; + preempt_enable(); +} + +static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + enable_kernel_spe(); + kvmppc_load_guest_spe(vcpu); + vcpu->arch.shadow_msr |= MSR_SPE; + preempt_enable(); +} + +static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.shared->msr & MSR_SPE) { + if (!(vcpu->arch.shadow_msr & MSR_SPE)) + kvmppc_vcpu_enable_spe(vcpu); + } else if (vcpu->arch.shadow_msr & MSR_SPE) { + kvmppc_vcpu_disable_spe(vcpu); + } +} +#else +static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) +{ +} +#endif + +/* + * Helper function for "full" MSR writes. No need to call this if only + * EE/CE/ME/DE/RI are changing. + */ +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) +{ + u32 old_msr = vcpu->arch.shared->msr; + + vcpu->arch.shared->msr = new_msr; + + kvmppc_mmu_msr_notify(vcpu, old_msr); + kvmppc_vcpu_sync_spe(vcpu); +} + +static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, + unsigned int priority) +{ + set_bit(priority, &vcpu->arch.pending_exceptions); +} + +static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) +{ + vcpu->arch.queued_dear = dear_flags; + vcpu->arch.queued_esr = esr_flags; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); +} + +static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) +{ + vcpu->arch.queued_dear = dear_flags; + vcpu->arch.queued_esr = esr_flags; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); +} + +static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, + ulong esr_flags) +{ + vcpu->arch.queued_esr = esr_flags; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); +} + +void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) +{ + vcpu->arch.queued_esr = esr_flags; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); +} + +void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); +} + +int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) +{ + return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); +} + +void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); +} + +void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + unsigned int prio = BOOKE_IRQPRIO_EXTERNAL; + + if (irq->irq == KVM_INTERRUPT_SET_LEVEL) + prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL; + + kvmppc_booke_queue_irqprio(vcpu, prio); +} + +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); + clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); +} + +/* Deliver the interrupt of the corresponding priority, if possible. */ +static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, + unsigned int priority) +{ + int allowed = 0; + ulong uninitialized_var(msr_mask); + bool update_esr = false, update_dear = false; + ulong crit_raw = vcpu->arch.shared->critical; + ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); + bool crit; + bool keep_irq = false; + + /* Truncate crit indicators in 32 bit mode */ + if (!(vcpu->arch.shared->msr & MSR_SF)) { + crit_raw &= 0xffffffff; + crit_r1 &= 0xffffffff; + } + + /* Critical section when crit == r1 */ + crit = (crit_raw == crit_r1); + /* ... and we're in supervisor mode */ + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); + + if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) { + priority = BOOKE_IRQPRIO_EXTERNAL; + keep_irq = true; + } + + switch (priority) { + case BOOKE_IRQPRIO_DTLB_MISS: + case BOOKE_IRQPRIO_DATA_STORAGE: + update_dear = true; + /* fall through */ + case BOOKE_IRQPRIO_INST_STORAGE: + case BOOKE_IRQPRIO_PROGRAM: + update_esr = true; + /* fall through */ + case BOOKE_IRQPRIO_ITLB_MISS: + case BOOKE_IRQPRIO_SYSCALL: + case BOOKE_IRQPRIO_FP_UNAVAIL: + case BOOKE_IRQPRIO_SPE_UNAVAIL: + case BOOKE_IRQPRIO_SPE_FP_DATA: + case BOOKE_IRQPRIO_SPE_FP_ROUND: + case BOOKE_IRQPRIO_AP_UNAVAIL: + case BOOKE_IRQPRIO_ALIGNMENT: + allowed = 1; + msr_mask = MSR_CE|MSR_ME|MSR_DE; + break; + case BOOKE_IRQPRIO_CRITICAL: + case BOOKE_IRQPRIO_WATCHDOG: + allowed = vcpu->arch.shared->msr & MSR_CE; + msr_mask = MSR_ME; + break; + case BOOKE_IRQPRIO_MACHINE_CHECK: + allowed = vcpu->arch.shared->msr & MSR_ME; + msr_mask = 0; + break; + case BOOKE_IRQPRIO_DECREMENTER: + case BOOKE_IRQPRIO_FIT: + keep_irq = true; + /* fall through */ + case BOOKE_IRQPRIO_EXTERNAL: + allowed = vcpu->arch.shared->msr & MSR_EE; + allowed = allowed && !crit; + msr_mask = MSR_CE|MSR_ME|MSR_DE; + break; + case BOOKE_IRQPRIO_DEBUG: + allowed = vcpu->arch.shared->msr & MSR_DE; + msr_mask = MSR_ME; + break; + } + + if (allowed) { + vcpu->arch.shared->srr0 = vcpu->arch.pc; + vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; + if (update_esr == true) + vcpu->arch.shared->esr = vcpu->arch.queued_esr; + if (update_dear == true) + vcpu->arch.shared->dar = vcpu->arch.queued_dear; + kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); + + if (!keep_irq) + clear_bit(priority, &vcpu->arch.pending_exceptions); + } + + return allowed; +} + +static void update_timer_ints(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) + kvmppc_core_queue_dec(vcpu); + else + kvmppc_core_dequeue_dec(vcpu); +} + +static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) +{ + unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned int priority; + + if (vcpu->requests) { + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { + smp_mb(); + update_timer_ints(vcpu); + } + } + + priority = __ffs(*pending); + while (priority <= BOOKE_IRQPRIO_MAX) { + if (kvmppc_booke_irqprio_deliver(vcpu, priority)) + break; + + priority = find_next_bit(pending, + BITS_PER_BYTE * sizeof(*pending), + priority + 1); + } + + /* Tell the guest about our interrupt status */ + vcpu->arch.shared->int_pending = !!*pending; +} + +/* Check pending exceptions and deliver one, if possible. */ +void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(!irqs_disabled()); + + kvmppc_core_check_exceptions(vcpu); + + if (vcpu->arch.shared->msr & MSR_WE) { + local_irq_enable(); + kvm_vcpu_block(vcpu); + local_irq_disable(); + + kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); + kvmppc_core_check_exceptions(vcpu); + }; +} + +int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + int ret; + + if (!vcpu->arch.sane) { + kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return -EINVAL; + } + + local_irq_disable(); + + kvmppc_core_prepare_to_enter(vcpu); + + if (signal_pending(current)) { + kvm_run->exit_reason = KVM_EXIT_INTR; + ret = -EINTR; + goto out; + } + + kvm_guest_enter(); + ret = __kvmppc_vcpu_run(kvm_run, vcpu); + kvm_guest_exit(); + +out: + local_irq_enable(); + return ret; +} + +/** + * kvmppc_handle_exit + * + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) + */ +int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + enum emulation_result er; + int r = RESUME_HOST; + + /* update before a new last_exit_type is rewritten */ + kvmppc_update_timing_stats(vcpu); + + local_irq_enable(); + + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + switch (exit_nr) { + case BOOKE_INTERRUPT_MACHINE_CHECK: + printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); + kvmppc_dump_vcpu(vcpu); + r = RESUME_HOST; + break; + + case BOOKE_INTERRUPT_EXTERNAL: + kvmppc_account_exit(vcpu, EXT_INTR_EXITS); + if (need_resched()) + cond_resched(); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_DECREMENTER: + /* Since we switched IVPR back to the host's value, the host + * handled this interrupt the moment we enabled interrupts. + * Now we just offer it a chance to reschedule the guest. */ + kvmppc_account_exit(vcpu, DEC_EXITS); + if (need_resched()) + cond_resched(); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_PROGRAM: + if (vcpu->arch.shared->msr & MSR_PR) { + /* Program traps generated by user-level software must be handled + * by the guest kernel. */ + kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); + r = RESUME_GUEST; + kvmppc_account_exit(vcpu, USR_PR_INST); + break; + } + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* don't overwrite subtypes, just account kvm_stats */ + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); + /* Future optimization: only reload non-volatiles if + * they were actually modified by emulation. */ + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_DCR: + run->exit_reason = KVM_EXIT_DCR; + r = RESUME_HOST; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", + __func__, vcpu->arch.pc, vcpu->arch.last_inst); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + r = RESUME_HOST; + break; + default: + BUG(); + } + break; + + case BOOKE_INTERRUPT_FP_UNAVAIL: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); + kvmppc_account_exit(vcpu, FP_UNAVAIL); + r = RESUME_GUEST; + break; + +#ifdef CONFIG_SPE + case BOOKE_INTERRUPT_SPE_UNAVAIL: { + if (vcpu->arch.shared->msr & MSR_SPE) + kvmppc_vcpu_enable_spe(vcpu); + else + kvmppc_booke_queue_irqprio(vcpu, + BOOKE_IRQPRIO_SPE_UNAVAIL); + r = RESUME_GUEST; + break; + } + + case BOOKE_INTERRUPT_SPE_FP_DATA: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SPE_FP_ROUND: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); + r = RESUME_GUEST; + break; +#else + case BOOKE_INTERRUPT_SPE_UNAVAIL: + /* + * Guest wants SPE, but host kernel doesn't support it. Send + * an "unimplemented operation" program check to the guest. + */ + kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV); + r = RESUME_GUEST; + break; + + /* + * These really should never happen without CONFIG_SPE, + * as we should never enable the real MSR[SPE] in the guest. + */ + case BOOKE_INTERRUPT_SPE_FP_DATA: + case BOOKE_INTERRUPT_SPE_FP_ROUND: + printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n", + __func__, exit_nr, vcpu->arch.pc); + run->hw.hardware_exit_reason = exit_nr; + r = RESUME_HOST; + break; +#endif + + case BOOKE_INTERRUPT_DATA_STORAGE: + kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear, + vcpu->arch.fault_esr); + kvmppc_account_exit(vcpu, DSI_EXITS); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_INST_STORAGE: + kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr); + kvmppc_account_exit(vcpu, ISI_EXITS); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SYSCALL: + if (!(vcpu->arch.shared->msr & MSR_PR) && + (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { + /* KVM PV hypercalls */ + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + r = RESUME_GUEST; + } else { + /* Guest syscalls */ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + } + kvmppc_account_exit(vcpu, SYSCALL_EXITS); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_DTLB_MISS: { + unsigned long eaddr = vcpu->arch.fault_dear; + int gtlb_index; + gpa_t gpaddr; + gfn_t gfn; + +#ifdef CONFIG_KVM_E500 + if (!(vcpu->arch.shared->msr & MSR_PR) && + (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { + kvmppc_map_magic(vcpu); + kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); + r = RESUME_GUEST; + + break; + } +#endif + + /* Check the guest TLB. */ + gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); + if (gtlb_index < 0) { + /* The guest didn't have a mapping for it. */ + kvmppc_core_queue_dtlb_miss(vcpu, + vcpu->arch.fault_dear, + vcpu->arch.fault_esr); + kvmppc_mmu_dtlb_miss(vcpu); + kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); + r = RESUME_GUEST; + break; + } + + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); + gfn = gpaddr >> PAGE_SHIFT; + + if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* The guest TLB had a mapping, but the shadow TLB + * didn't, and it is RAM. This could be because: + * a) the entry is mapping the host kernel, or + * b) the guest used a large mapping which we're faking + * Either way, we need to satisfy the fault without + * invoking the guest. */ + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); + kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); + r = RESUME_GUEST; + } else { + /* Guest has mapped and accessed a page which is not + * actually RAM. */ + vcpu->arch.paddr_accessed = gpaddr; + r = kvmppc_emulate_mmio(run, vcpu); + kvmppc_account_exit(vcpu, MMIO_EXITS); + } + + break; + } + + case BOOKE_INTERRUPT_ITLB_MISS: { + unsigned long eaddr = vcpu->arch.pc; + gpa_t gpaddr; + gfn_t gfn; + int gtlb_index; + + r = RESUME_GUEST; + + /* Check the guest TLB. */ + gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); + if (gtlb_index < 0) { + /* The guest didn't have a mapping for it. */ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); + kvmppc_mmu_itlb_miss(vcpu); + kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); + break; + } + + kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); + + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); + gfn = gpaddr >> PAGE_SHIFT; + + if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* The guest TLB had a mapping, but the shadow TLB + * didn't. This could be because: + * a) the entry is mapping the host kernel, or + * b) the guest used a large mapping which we're faking + * Either way, we need to satisfy the fault without + * invoking the guest. */ + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); + } else { + /* Guest mapped and leaped at non-RAM! */ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); + } + + break; + } + + case BOOKE_INTERRUPT_DEBUG: { + u32 dbsr; + + vcpu->arch.pc = mfspr(SPRN_CSRR0); + + /* clear IAC events in DBSR register */ + dbsr = mfspr(SPRN_DBSR); + dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; + mtspr(SPRN_DBSR, dbsr); + + run->exit_reason = KVM_EXIT_DEBUG; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + r = RESUME_HOST; + break; + } + + default: + printk(KERN_EMERG "exit_nr %d\n", exit_nr); + BUG(); + } + + local_irq_disable(); + + kvmppc_core_prepare_to_enter(vcpu); + + if (!(r & RESUME_HOST)) { + /* To avoid clobbering exit_reason, only check for signals if + * we aren't already exiting to userspace for some other + * reason. */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); + kvmppc_account_exit(vcpu, SIGNAL_EXITS); + } + } + + return r; +} + +/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int i; + int r; + + vcpu->arch.pc = 0; + vcpu->arch.shared->msr = 0; + vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; + vcpu->arch.shared->pir = vcpu->vcpu_id; + kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ + + vcpu->arch.shadow_pid = 1; + + /* Eye-catching numbers so we know if the guest takes an interrupt + * before it's programmed its own IVPR/IVORs. */ + vcpu->arch.ivpr = 0x55550000; + for (i = 0; i < BOOKE_IRQPRIO_MAX; i++) + vcpu->arch.ivor[i] = 0x7700 | i * 4; + + kvmppc_init_timing_stats(vcpu); + + r = kvmppc_core_vcpu_setup(vcpu); + kvmppc_sanity_check(vcpu); + return r; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + regs->pc = vcpu->arch.pc; + regs->cr = kvmppc_get_cr(vcpu); + regs->ctr = vcpu->arch.ctr; + regs->lr = vcpu->arch.lr; + regs->xer = kvmppc_get_xer(vcpu); + regs->msr = vcpu->arch.shared->msr; + regs->srr0 = vcpu->arch.shared->srr0; + regs->srr1 = vcpu->arch.shared->srr1; + regs->pid = vcpu->arch.pid; + regs->sprg0 = vcpu->arch.shared->sprg0; + regs->sprg1 = vcpu->arch.shared->sprg1; + regs->sprg2 = vcpu->arch.shared->sprg2; + regs->sprg3 = vcpu->arch.shared->sprg3; + regs->sprg4 = vcpu->arch.shared->sprg4; + regs->sprg5 = vcpu->arch.shared->sprg5; + regs->sprg6 = vcpu->arch.shared->sprg6; + regs->sprg7 = vcpu->arch.shared->sprg7; + + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) + regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu->arch.pc = regs->pc; + kvmppc_set_cr(vcpu, regs->cr); + vcpu->arch.ctr = regs->ctr; + vcpu->arch.lr = regs->lr; + kvmppc_set_xer(vcpu, regs->xer); + kvmppc_set_msr(vcpu, regs->msr); + vcpu->arch.shared->srr0 = regs->srr0; + vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_pid(vcpu, regs->pid); + vcpu->arch.shared->sprg0 = regs->sprg0; + vcpu->arch.shared->sprg1 = regs->sprg1; + vcpu->arch.shared->sprg2 = regs->sprg2; + vcpu->arch.shared->sprg3 = regs->sprg3; + vcpu->arch.shared->sprg4 = regs->sprg4; + vcpu->arch.shared->sprg5 = regs->sprg5; + vcpu->arch.shared->sprg6 = regs->sprg6; + vcpu->arch.shared->sprg7 = regs->sprg7; + + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) + kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + + return 0; +} + +static void get_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + u64 tb = get_tb(); + + sregs->u.e.features |= KVM_SREGS_E_BASE; + + sregs->u.e.csrr0 = vcpu->arch.csrr0; + sregs->u.e.csrr1 = vcpu->arch.csrr1; + sregs->u.e.mcsr = vcpu->arch.mcsr; + sregs->u.e.esr = vcpu->arch.shared->esr; + sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.tsr = vcpu->arch.tsr; + sregs->u.e.tcr = vcpu->arch.tcr; + sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); + sregs->u.e.tb = tb; + sregs->u.e.vrsave = vcpu->arch.vrsave; +} + +static int set_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) + return 0; + + vcpu->arch.csrr0 = sregs->u.e.csrr0; + vcpu->arch.csrr1 = sregs->u.e.csrr1; + vcpu->arch.mcsr = sregs->u.e.mcsr; + vcpu->arch.shared->esr = sregs->u.e.esr; + vcpu->arch.shared->dar = sregs->u.e.dear; + vcpu->arch.vrsave = sregs->u.e.vrsave; + kvmppc_set_tcr(vcpu, sregs->u.e.tcr); + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) { + vcpu->arch.dec = sregs->u.e.dec; + kvmppc_emulate_dec(vcpu); + } + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + vcpu->arch.tsr = sregs->u.e.tsr; + update_timer_ints(vcpu); + } + + return 0; +} + +static void get_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_ARCH206; + + sregs->u.e.pir = vcpu->vcpu_id; + sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; + sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; + sregs->u.e.decar = vcpu->arch.decar; + sregs->u.e.ivpr = vcpu->arch.ivpr; +} + +static int set_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) + return 0; + + if (sregs->u.e.pir != vcpu->vcpu_id) + return -EINVAL; + + vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; + vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; + vcpu->arch.decar = sregs->u.e.decar; + vcpu->arch.ivpr = sregs->u.e.ivpr; + + return 0; +} + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_IVOR; + + sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; +} + +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + sregs->pvr = vcpu->arch.pvr; + + get_sregs_base(vcpu, sregs); + get_sregs_arch206(vcpu, sregs); + kvmppc_core_get_sregs(vcpu, sregs); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int ret; + + if (vcpu->arch.pvr != sregs->pvr) + return -EINVAL; + + ret = set_sregs_base(vcpu, sregs); + if (ret < 0) + return ret; + + ret = set_sregs_arch206(vcpu, sregs); + if (ret < 0) + return ret; + + return kvmppc_core_set_sregs(vcpu, sregs); +} + +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int r; + + r = kvmppc_core_vcpu_translate(vcpu, tr); + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + return -ENOTSUPP; +} + +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + return 0; +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) +{ + vcpu->arch.tcr = new_tcr; + update_timer_ints(vcpu); +} + +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) +{ + set_bits(tsr_bits, &vcpu->arch.tsr); + smp_wmb(); + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); +} + +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) +{ + clear_bits(tsr_bits, &vcpu->arch.tsr); + update_timer_ints(vcpu); +} + +void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_set_tsr_bits(vcpu, TSR_DIS); +} + +int __init kvmppc_booke_init(void) +{ + unsigned long ivor[16]; + unsigned long max_ivor = 0; + int i; + + /* We install our own exception handlers by hijacking IVPR. IVPR must + * be 16-bit aligned, so we need a 64KB allocation. */ + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, + VCPU_SIZE_ORDER); + if (!kvmppc_booke_handlers) + return -ENOMEM; + + /* XXX make sure our handlers are smaller than Linux's */ + + /* Copy our interrupt handlers to match host IVORs. That way we don't + * have to swap the IVORs on every guest/host transition. */ + ivor[0] = mfspr(SPRN_IVOR0); + ivor[1] = mfspr(SPRN_IVOR1); + ivor[2] = mfspr(SPRN_IVOR2); + ivor[3] = mfspr(SPRN_IVOR3); + ivor[4] = mfspr(SPRN_IVOR4); + ivor[5] = mfspr(SPRN_IVOR5); + ivor[6] = mfspr(SPRN_IVOR6); + ivor[7] = mfspr(SPRN_IVOR7); + ivor[8] = mfspr(SPRN_IVOR8); + ivor[9] = mfspr(SPRN_IVOR9); + ivor[10] = mfspr(SPRN_IVOR10); + ivor[11] = mfspr(SPRN_IVOR11); + ivor[12] = mfspr(SPRN_IVOR12); + ivor[13] = mfspr(SPRN_IVOR13); + ivor[14] = mfspr(SPRN_IVOR14); + ivor[15] = mfspr(SPRN_IVOR15); + + for (i = 0; i < 16; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + i * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + return 0; +} + +void __exit kvmppc_booke_exit(void) +{ + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); + kvm_exit(); +} diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h new file mode 100644 index 00000000..2fe20270 --- /dev/null +++ b/arch/powerpc/kvm/booke.h @@ -0,0 +1,74 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __KVM_BOOKE_H__ +#define __KVM_BOOKE_H__ + +#include <linux/types.h> +#include <linux/kvm_host.h> +#include <asm/kvm_ppc.h> +#include "timing.h" + +/* interrupt priortity ordering */ +#define BOOKE_IRQPRIO_DATA_STORAGE 0 +#define BOOKE_IRQPRIO_INST_STORAGE 1 +#define BOOKE_IRQPRIO_ALIGNMENT 2 +#define BOOKE_IRQPRIO_PROGRAM 3 +#define BOOKE_IRQPRIO_FP_UNAVAIL 4 +#define BOOKE_IRQPRIO_SPE_UNAVAIL 5 +#define BOOKE_IRQPRIO_SPE_FP_DATA 6 +#define BOOKE_IRQPRIO_SPE_FP_ROUND 7 +#define BOOKE_IRQPRIO_SYSCALL 8 +#define BOOKE_IRQPRIO_AP_UNAVAIL 9 +#define BOOKE_IRQPRIO_DTLB_MISS 10 +#define BOOKE_IRQPRIO_ITLB_MISS 11 +#define BOOKE_IRQPRIO_MACHINE_CHECK 12 +#define BOOKE_IRQPRIO_DEBUG 13 +#define BOOKE_IRQPRIO_CRITICAL 14 +#define BOOKE_IRQPRIO_WATCHDOG 15 +#define BOOKE_IRQPRIO_EXTERNAL 16 +#define BOOKE_IRQPRIO_FIT 17 +#define BOOKE_IRQPRIO_DECREMENTER 18 +#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 +/* Internal pseudo-irqprio for level triggered externals */ +#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 +#define BOOKE_IRQPRIO_MAX 20 + +extern unsigned long kvmppc_booke_handlers; + +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); + +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); + +int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); + +/* low-level asm code to transfer guest state */ +void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu); +void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu); + +/* high-level function, manages flags, host state */ +void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); + +#endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c new file mode 100644 index 00000000..3e652da3 --- /dev/null +++ b/arch/powerpc/kvm/booke_emulate.c @@ -0,0 +1,272 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <asm/disassemble.h> + +#include "booke.h" + +#define OP_19_XOP_RFI 50 + +#define OP_31_XOP_MFMSR 83 +#define OP_31_XOP_WRTEE 131 +#define OP_31_XOP_MTMSR 146 +#define OP_31_XOP_WRTEEI 163 + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.shared->srr0; + kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); +} + +int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int rs; + int rt; + + switch (get_op(inst)) { + case 19: + switch (get_xop(inst)) { + case OP_19_XOP_RFI: + kvmppc_emul_rfi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); + *advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case OP_31_XOP_MFMSR: + rt = get_rt(inst); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); + break; + + case OP_31_XOP_MTMSR: + rs = get_rs(inst); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); + kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); + break; + + case OP_31_XOP_WRTEE: + rs = get_rs(inst); + vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) + | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case OP_31_XOP_WRTEEI: + vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) + | (inst & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + int emulated = EMULATE_DONE; + ulong spr_val = kvmppc_get_gpr(vcpu, rs); + + switch (sprn) { + case SPRN_DEAR: + vcpu->arch.shared->dar = spr_val; break; + case SPRN_ESR: + vcpu->arch.shared->esr = spr_val; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = spr_val; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = spr_val; break; + case SPRN_DBSR: + vcpu->arch.dbsr &= ~spr_val; break; + case SPRN_TSR: + kvmppc_clr_tsr_bits(vcpu, spr_val); + break; + case SPRN_TCR: + kvmppc_set_tcr(vcpu, spr_val); + break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.shared->sprg4 = spr_val; break; + case SPRN_SPRG5: + vcpu->arch.shared->sprg5 = spr_val; break; + case SPRN_SPRG6: + vcpu->arch.shared->sprg6 = spr_val; break; + case SPRN_SPRG7: + vcpu->arch.shared->sprg7 = spr_val; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = spr_val; + break; + case SPRN_IVOR0: + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; + break; + case SPRN_IVOR1: + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val; + break; + case SPRN_IVOR2: + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; + break; + case SPRN_IVOR3: + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; + break; + case SPRN_IVOR4: + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val; + break; + case SPRN_IVOR5: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val; + break; + case SPRN_IVOR6: + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val; + break; + case SPRN_IVOR7: + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val; + break; + case SPRN_IVOR8: + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; + break; + case SPRN_IVOR9: + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; + break; + case SPRN_IVOR10: + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val; + break; + case SPRN_IVOR11: + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val; + break; + case SPRN_IVOR12: + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val; + break; + case SPRN_IVOR13: + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val; + break; + case SPRN_IVOR14: + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val; + break; + case SPRN_IVOR15: + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_IVPR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; + case SPRN_DEAR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; + case SPRN_ESR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break; + case SPRN_DBCR0: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; + case SPRN_DBCR1: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; + case SPRN_DBSR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; + case SPRN_TSR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break; + case SPRN_TCR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break; + + case SPRN_IVOR0: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); + break; + case SPRN_IVOR1: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]); + break; + case SPRN_IVOR2: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); + break; + case SPRN_IVOR3: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]); + break; + case SPRN_IVOR4: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]); + break; + case SPRN_IVOR5: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]); + break; + case SPRN_IVOR6: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]); + break; + case SPRN_IVOR7: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]); + break; + case SPRN_IVOR8: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); + break; + case SPRN_IVOR9: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]); + break; + case SPRN_IVOR10: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]); + break; + case SPRN_IVOR11: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]); + break; + case SPRN_IVOR12: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]); + break; + case SPRN_IVOR13: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]); + break; + case SPRN_IVOR14: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]); + break; + case SPRN_IVOR15: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]); + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S new file mode 100644 index 00000000..c8c4b878 --- /dev/null +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -0,0 +1,487 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/mmu-44x.h> +#include <asm/page.h> +#include <asm/asm-offsets.h> + +#define VCPU_GPR(n) (VCPU_GPRS + (n * 4)) + +/* The host stack layout: */ +#define HOST_R1 0 /* Implied by stwu. */ +#define HOST_CALLEE_LR 4 +#define HOST_RUN 8 +/* r2 is special: it holds 'current', and it made nonvolatile in the + * kernel with the -ffixed-r2 gcc option. */ +#define HOST_R2 12 +#define HOST_CR 16 +#define HOST_NV_GPRS 20 +#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4)) +#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4) +#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */ +#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */ + +#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \ + (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ + (1<<BOOKE_INTERRUPT_DEBUG)) + +#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ + (1<<BOOKE_INTERRUPT_DTLB_MISS)) + +#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ + (1<<BOOKE_INTERRUPT_INST_STORAGE) | \ + (1<<BOOKE_INTERRUPT_PROGRAM) | \ + (1<<BOOKE_INTERRUPT_DTLB_MISS)) + +.macro KVM_HANDLER ivor_nr +_GLOBAL(kvmppc_handler_\ivor_nr) + /* Get pointer to vcpu and record exit number. */ + mtspr SPRN_SPRG_WSCRATCH0, r4 + mfspr r4, SPRN_SPRG_RVCPU + stw r5, VCPU_GPR(r5)(r4) + stw r6, VCPU_GPR(r6)(r4) + mfctr r5 + lis r6, kvmppc_resume_host@h + stw r5, VCPU_CTR(r4) + li r5, \ivor_nr + ori r6, r6, kvmppc_resume_host@l + mtctr r6 + bctr +.endm + +_GLOBAL(kvmppc_handlers_start) +KVM_HANDLER BOOKE_INTERRUPT_CRITICAL +KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK +KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE +KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE +KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL +KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT +KVM_HANDLER BOOKE_INTERRUPT_PROGRAM +KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL +KVM_HANDLER BOOKE_INTERRUPT_SYSCALL +KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL +KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER +KVM_HANDLER BOOKE_INTERRUPT_FIT +KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG +KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS +KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS +KVM_HANDLER BOOKE_INTERRUPT_DEBUG +KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND + +_GLOBAL(kvmppc_handler_len) + .long kvmppc_handler_1 - kvmppc_handler_0 + + +/* Registers: + * SPRG_SCRATCH0: guest r4 + * r4: vcpu pointer + * r5: KVM exit number + */ +_GLOBAL(kvmppc_resume_host) + stw r3, VCPU_GPR(r3)(r4) + mfcr r3 + stw r3, VCPU_CR(r4) + stw r7, VCPU_GPR(r7)(r4) + stw r8, VCPU_GPR(r8)(r4) + stw r9, VCPU_GPR(r9)(r4) + + li r6, 1 + slw r6, r6, r5 + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save exit time */ +1: + mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + bne 1b + stw r8, VCPU_TIMING_EXIT_TBL(r4) + stw r9, VCPU_TIMING_EXIT_TBU(r4) +#endif + + /* Save the faulting instruction and all GPRs for emulation. */ + andi. r7, r6, NEED_INST_MASK + beq ..skip_inst_copy + mfspr r9, SPRN_SRR0 + mfmsr r8 + ori r7, r8, MSR_DS + mtmsr r7 + isync + lwz r9, 0(r9) + mtmsr r8 + isync + stw r9, VCPU_LAST_INST(r4) + + stw r15, VCPU_GPR(r15)(r4) + stw r16, VCPU_GPR(r16)(r4) + stw r17, VCPU_GPR(r17)(r4) + stw r18, VCPU_GPR(r18)(r4) + stw r19, VCPU_GPR(r19)(r4) + stw r20, VCPU_GPR(r20)(r4) + stw r21, VCPU_GPR(r21)(r4) + stw r22, VCPU_GPR(r22)(r4) + stw r23, VCPU_GPR(r23)(r4) + stw r24, VCPU_GPR(r24)(r4) + stw r25, VCPU_GPR(r25)(r4) + stw r26, VCPU_GPR(r26)(r4) + stw r27, VCPU_GPR(r27)(r4) + stw r28, VCPU_GPR(r28)(r4) + stw r29, VCPU_GPR(r29)(r4) + stw r30, VCPU_GPR(r30)(r4) + stw r31, VCPU_GPR(r31)(r4) +..skip_inst_copy: + + /* Also grab DEAR and ESR before the host can clobber them. */ + + andi. r7, r6, NEED_DEAR_MASK + beq ..skip_dear + mfspr r9, SPRN_DEAR + stw r9, VCPU_FAULT_DEAR(r4) +..skip_dear: + + andi. r7, r6, NEED_ESR_MASK + beq ..skip_esr + mfspr r9, SPRN_ESR + stw r9, VCPU_FAULT_ESR(r4) +..skip_esr: + + /* Save remaining volatile guest register state to vcpu. */ + stw r0, VCPU_GPR(r0)(r4) + stw r1, VCPU_GPR(r1)(r4) + stw r2, VCPU_GPR(r2)(r4) + stw r10, VCPU_GPR(r10)(r4) + stw r11, VCPU_GPR(r11)(r4) + stw r12, VCPU_GPR(r12)(r4) + stw r13, VCPU_GPR(r13)(r4) + stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */ + mflr r3 + stw r3, VCPU_LR(r4) + mfxer r3 + stw r3, VCPU_XER(r4) + mfspr r3, SPRN_SPRG_RSCRATCH0 + stw r3, VCPU_GPR(r4)(r4) + mfspr r3, SPRN_SRR0 + stw r3, VCPU_PC(r4) + + /* Restore host stack pointer and PID before IVPR, since the host + * exception handlers use them. */ + lwz r1, VCPU_HOST_STACK(r4) + lwz r3, VCPU_HOST_PID(r4) + mtspr SPRN_PID, r3 + +#ifdef CONFIG_FSL_BOOKE + /* we cheat and know that Linux doesn't use PID1 which is always 0 */ + lis r3, 0 + mtspr SPRN_PID1, r3 +#endif + + /* Restore host IVPR before re-enabling interrupts. We cheat and know + * that Linux IVPR is always 0xc0000000. */ + lis r3, 0xc000 + mtspr SPRN_IVPR, r3 + + /* Switch to kernel stack and jump to handler. */ + LOAD_REG_ADDR(r3, kvmppc_handle_exit) + mtctr r3 + lwz r3, HOST_RUN(r1) + lwz r2, HOST_R2(r1) + mr r14, r4 /* Save vcpu pointer. */ + + bctrl /* kvmppc_handle_exit() */ + + /* Restore vcpu pointer and the nonvolatiles we used. */ + mr r4, r14 + lwz r14, VCPU_GPR(r14)(r4) + + /* Sometimes instruction emulation must restore complete GPR state. */ + andi. r5, r3, RESUME_FLAG_NV + beq ..skip_nv_load + lwz r15, VCPU_GPR(r15)(r4) + lwz r16, VCPU_GPR(r16)(r4) + lwz r17, VCPU_GPR(r17)(r4) + lwz r18, VCPU_GPR(r18)(r4) + lwz r19, VCPU_GPR(r19)(r4) + lwz r20, VCPU_GPR(r20)(r4) + lwz r21, VCPU_GPR(r21)(r4) + lwz r22, VCPU_GPR(r22)(r4) + lwz r23, VCPU_GPR(r23)(r4) + lwz r24, VCPU_GPR(r24)(r4) + lwz r25, VCPU_GPR(r25)(r4) + lwz r26, VCPU_GPR(r26)(r4) + lwz r27, VCPU_GPR(r27)(r4) + lwz r28, VCPU_GPR(r28)(r4) + lwz r29, VCPU_GPR(r29)(r4) + lwz r30, VCPU_GPR(r30)(r4) + lwz r31, VCPU_GPR(r31)(r4) +..skip_nv_load: + + /* Should we return to the guest? */ + andi. r5, r3, RESUME_FLAG_HOST + beq lightweight_exit + + srawi r3, r3, 2 /* Shift -ERR back down. */ + +heavyweight_exit: + /* Not returning to guest. */ + +#ifdef CONFIG_SPE + /* save guest SPEFSCR and load host SPEFSCR */ + mfspr r9, SPRN_SPEFSCR + stw r9, VCPU_SPEFSCR(r4) + lwz r9, VCPU_HOST_SPEFSCR(r4) + mtspr SPRN_SPEFSCR, r9 +#endif + + /* We already saved guest volatile register state; now save the + * non-volatiles. */ + stw r15, VCPU_GPR(r15)(r4) + stw r16, VCPU_GPR(r16)(r4) + stw r17, VCPU_GPR(r17)(r4) + stw r18, VCPU_GPR(r18)(r4) + stw r19, VCPU_GPR(r19)(r4) + stw r20, VCPU_GPR(r20)(r4) + stw r21, VCPU_GPR(r21)(r4) + stw r22, VCPU_GPR(r22)(r4) + stw r23, VCPU_GPR(r23)(r4) + stw r24, VCPU_GPR(r24)(r4) + stw r25, VCPU_GPR(r25)(r4) + stw r26, VCPU_GPR(r26)(r4) + stw r27, VCPU_GPR(r27)(r4) + stw r28, VCPU_GPR(r28)(r4) + stw r29, VCPU_GPR(r29)(r4) + stw r30, VCPU_GPR(r30)(r4) + stw r31, VCPU_GPR(r31)(r4) + + /* Load host non-volatile register state from host stack. */ + lwz r14, HOST_NV_GPR(r14)(r1) + lwz r15, HOST_NV_GPR(r15)(r1) + lwz r16, HOST_NV_GPR(r16)(r1) + lwz r17, HOST_NV_GPR(r17)(r1) + lwz r18, HOST_NV_GPR(r18)(r1) + lwz r19, HOST_NV_GPR(r19)(r1) + lwz r20, HOST_NV_GPR(r20)(r1) + lwz r21, HOST_NV_GPR(r21)(r1) + lwz r22, HOST_NV_GPR(r22)(r1) + lwz r23, HOST_NV_GPR(r23)(r1) + lwz r24, HOST_NV_GPR(r24)(r1) + lwz r25, HOST_NV_GPR(r25)(r1) + lwz r26, HOST_NV_GPR(r26)(r1) + lwz r27, HOST_NV_GPR(r27)(r1) + lwz r28, HOST_NV_GPR(r28)(r1) + lwz r29, HOST_NV_GPR(r29)(r1) + lwz r30, HOST_NV_GPR(r30)(r1) + lwz r31, HOST_NV_GPR(r31)(r1) + + /* Return to kvm_vcpu_run(). */ + lwz r4, HOST_STACK_LR(r1) + lwz r5, HOST_CR(r1) + addi r1, r1, HOST_STACK_SIZE + mtlr r4 + mtcr r5 + /* r3 still contains the return code from kvmppc_handle_exit(). */ + blr + + +/* Registers: + * r3: kvm_run pointer + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcpu_run) + stwu r1, -HOST_STACK_SIZE(r1) + stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + + /* Save host state to stack. */ + stw r3, HOST_RUN(r1) + mflr r3 + stw r3, HOST_STACK_LR(r1) + mfcr r5 + stw r5, HOST_CR(r1) + + /* Save host non-volatile register state to stack. */ + stw r14, HOST_NV_GPR(r14)(r1) + stw r15, HOST_NV_GPR(r15)(r1) + stw r16, HOST_NV_GPR(r16)(r1) + stw r17, HOST_NV_GPR(r17)(r1) + stw r18, HOST_NV_GPR(r18)(r1) + stw r19, HOST_NV_GPR(r19)(r1) + stw r20, HOST_NV_GPR(r20)(r1) + stw r21, HOST_NV_GPR(r21)(r1) + stw r22, HOST_NV_GPR(r22)(r1) + stw r23, HOST_NV_GPR(r23)(r1) + stw r24, HOST_NV_GPR(r24)(r1) + stw r25, HOST_NV_GPR(r25)(r1) + stw r26, HOST_NV_GPR(r26)(r1) + stw r27, HOST_NV_GPR(r27)(r1) + stw r28, HOST_NV_GPR(r28)(r1) + stw r29, HOST_NV_GPR(r29)(r1) + stw r30, HOST_NV_GPR(r30)(r1) + stw r31, HOST_NV_GPR(r31)(r1) + + /* Load guest non-volatiles. */ + lwz r14, VCPU_GPR(r14)(r4) + lwz r15, VCPU_GPR(r15)(r4) + lwz r16, VCPU_GPR(r16)(r4) + lwz r17, VCPU_GPR(r17)(r4) + lwz r18, VCPU_GPR(r18)(r4) + lwz r19, VCPU_GPR(r19)(r4) + lwz r20, VCPU_GPR(r20)(r4) + lwz r21, VCPU_GPR(r21)(r4) + lwz r22, VCPU_GPR(r22)(r4) + lwz r23, VCPU_GPR(r23)(r4) + lwz r24, VCPU_GPR(r24)(r4) + lwz r25, VCPU_GPR(r25)(r4) + lwz r26, VCPU_GPR(r26)(r4) + lwz r27, VCPU_GPR(r27)(r4) + lwz r28, VCPU_GPR(r28)(r4) + lwz r29, VCPU_GPR(r29)(r4) + lwz r30, VCPU_GPR(r30)(r4) + lwz r31, VCPU_GPR(r31)(r4) + +#ifdef CONFIG_SPE + /* save host SPEFSCR and load guest SPEFSCR */ + mfspr r3, SPRN_SPEFSCR + stw r3, VCPU_HOST_SPEFSCR(r4) + lwz r3, VCPU_SPEFSCR(r4) + mtspr SPRN_SPEFSCR, r3 +#endif + +lightweight_exit: + stw r2, HOST_R2(r1) + + mfspr r3, SPRN_PID + stw r3, VCPU_HOST_PID(r4) + lwz r3, VCPU_SHADOW_PID(r4) + mtspr SPRN_PID, r3 + +#ifdef CONFIG_FSL_BOOKE + lwz r3, VCPU_SHADOW_PID1(r4) + mtspr SPRN_PID1, r3 +#endif + +#ifdef CONFIG_44x + iccci 0, 0 /* XXX hack */ +#endif + + /* Load some guest volatiles. */ + lwz r0, VCPU_GPR(r0)(r4) + lwz r2, VCPU_GPR(r2)(r4) + lwz r9, VCPU_GPR(r9)(r4) + lwz r10, VCPU_GPR(r10)(r4) + lwz r11, VCPU_GPR(r11)(r4) + lwz r12, VCPU_GPR(r12)(r4) + lwz r13, VCPU_GPR(r13)(r4) + lwz r3, VCPU_LR(r4) + mtlr r3 + lwz r3, VCPU_XER(r4) + mtxer r3 + + /* Switch the IVPR. XXX If we take a TLB miss after this we're screwed, + * so how do we make sure vcpu won't fault? */ + lis r8, kvmppc_booke_handlers@ha + lwz r8, kvmppc_booke_handlers@l(r8) + mtspr SPRN_IVPR, r8 + + /* Save vcpu pointer for the exception handlers. */ + mtspr SPRN_SPRG_WVCPU, r4 + + lwz r5, VCPU_SHARED(r4) + + /* Can't switch the stack pointer until after IVPR is switched, + * because host interrupt handlers would get confused. */ + lwz r1, VCPU_GPR(r1)(r4) + + /* + * Host interrupt handlers may have clobbered these + * guest-readable SPRGs, or the guest kernel may have + * written directly to the shared area, so we + * need to reload them here with the guest's values. + */ + lwz r3, VCPU_SHARED_SPRG4(r5) + mtspr SPRN_SPRG4W, r3 + lwz r3, VCPU_SHARED_SPRG5(r5) + mtspr SPRN_SPRG5W, r3 + lwz r3, VCPU_SHARED_SPRG6(r5) + mtspr SPRN_SPRG6W, r3 + lwz r3, VCPU_SHARED_SPRG7(r5) + mtspr SPRN_SPRG7W, r3 + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save enter time */ +1: + mfspr r6, SPRN_TBRU + mfspr r7, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + bne 1b + stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + + /* Finish loading guest volatiles and jump to guest. */ + lwz r3, VCPU_CTR(r4) + lwz r5, VCPU_CR(r4) + lwz r6, VCPU_PC(r4) + lwz r7, VCPU_SHADOW_MSR(r4) + mtctr r3 + mtcr r5 + mtsrr0 r6 + mtsrr1 r7 + lwz r5, VCPU_GPR(r5)(r4) + lwz r6, VCPU_GPR(r6)(r4) + lwz r7, VCPU_GPR(r7)(r4) + lwz r8, VCPU_GPR(r8)(r4) + + /* Clear any debug events which occurred since we disabled MSR[DE]. + * XXX This gives us a 3-instruction window in which a breakpoint + * intended for guest context could fire in the host instead. */ + lis r3, 0xffff + ori r3, r3, 0xffff + mtspr SPRN_DBSR, r3 + + lwz r3, VCPU_GPR(r3)(r4) + lwz r4, VCPU_GPR(r4)(r4) + rfi + +#ifdef CONFIG_SPE +_GLOBAL(kvmppc_save_guest_spe) + cmpi 0,r3,0 + beqlr- + SAVE_32EVRS(0, r4, r3, VCPU_EVR) + evxor evr6, evr6, evr6 + evmwumiaa evr6, evr6, evr6 + li r4,VCPU_ACC + evstddx evr6, r4, r3 /* save acc */ + blr + +_GLOBAL(kvmppc_load_guest_spe) + cmpi 0,r3,0 + beqlr- + li r4,VCPU_ACC + evlddx evr6,r4,r3 + evmra evr6,evr6 /* load acc */ + REST_32EVRS(0, r4, r3, VCPU_EVR) + blr +#endif diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c new file mode 100644 index 00000000..ddcd896f --- /dev/null +++ b/arch/powerpc/kvm/e500.c @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, <yu.liu@freescale.com> + * + * Description: + * This file is derived from arch/powerpc/kvm/44x.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/export.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/tlbflush.h> +#include <asm/kvm_e500.h> +#include <asm/kvm_ppc.h> + +#include "booke.h" +#include "e500_tlb.h" + +void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + kvmppc_e500_tlb_load(vcpu, cpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_e500_tlb_put(vcpu); + +#ifdef CONFIG_SPE + if (vcpu->arch.shadow_msr & MSR_SPE) + kvmppc_vcpu_disable_spe(vcpu); +#endif +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_e500_tlb_setup(vcpu_e500); + + /* Registers init */ + vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); + + vcpu->arch.cpu_type = KVM_CPU_E500V2; + + return 0; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); + if (index < 0) { + tr->valid = 0; + return 0; + } + + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | + KVM_SREGS_E_PM; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + sregs->u.e.mas0 = vcpu->arch.shared->mas0; + sregs->u.e.mas1 = vcpu->arch.shared->mas1; + sregs->u.e.mas2 = vcpu->arch.shared->mas2; + sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; + sregs->u.e.mas4 = vcpu->arch.shared->mas4; + sregs->u.e.mas6 = vcpu->arch.shared->mas6; + + sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); + sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; + sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; + + sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu->arch.shared->mas0 = sregs->u.e.mas0; + vcpu->arch.shared->mas1 = sregs->u.e.mas1; + vcpu->arch.shared->mas2 = sregs->u.e.mas2; + vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; + vcpu->arch.shared->mas4 = sregs->u.e.mas4; + vcpu->arch.shared->mas6 = sregs->u.e.mas6; + } + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_SPE) { + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = + sregs->u.e.ivor_high[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = + sregs->u.e.ivor_high[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = + sregs->u.e.ivor_high[2]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_e500 *vcpu_e500; + struct kvm_vcpu *vcpu; + int err; + + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_e500) { + err = -ENOMEM; + goto out; + } + + vcpu = &vcpu_e500->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + err = kvmppc_e500_tlb_init(vcpu_e500); + if (err) + goto uninit_vcpu; + + vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_tlb; + + return vcpu; + +uninit_tlb: + kvmppc_e500_tlb_uninit(vcpu_e500); +uninit_vcpu: + kvm_vcpu_uninit(vcpu); +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + free_page((unsigned long)vcpu->arch.shared); + kvm_vcpu_uninit(vcpu); + kvmppc_e500_tlb_uninit(vcpu_e500); + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +} + +static int __init kvmppc_e500_init(void) +{ + int r, i; + unsigned long ivor[3]; + unsigned long max_ivor = 0; + + r = kvmppc_core_check_processor_compat(); + if (r) + return r; + + r = kvmppc_booke_init(); + if (r) + return r; + + /* copy extra E500 exception handlers */ + ivor[0] = mfspr(SPRN_IVOR32); + ivor[1] = mfspr(SPRN_IVOR33); + ivor[2] = mfspr(SPRN_IVOR34); + for (i = 0; i < 3; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + (i + 16) * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +} + +static void __exit kvmppc_e500_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_e500_init); +module_exit(kvmppc_e500_exit); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c new file mode 100644 index 00000000..6d0b2bd5 --- /dev/null +++ b/arch/powerpc/kvm/e500_emulate.c @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, <yu.liu@freescale.com> + * + * Description: + * This file is derived from arch/powerpc/kvm/44x_emulate.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include <asm/kvm_e500.h> + +#include "booke.h" +#include "e500_tlb.h" + +#define XOP_TLBIVAX 786 +#define XOP_TLBSX 914 +#define XOP_TLBRE 946 +#define XOP_TLBWE 978 + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int ra; + int rb; + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + + case XOP_TLBRE: + emulated = kvmppc_e500_emul_tlbre(vcpu); + break; + + case XOP_TLBWE: + emulated = kvmppc_e500_emul_tlbwe(vcpu); + break; + + case XOP_TLBSX: + rb = get_rb(inst); + emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); + break; + + case XOP_TLBIVAX: + ra = get_ra(inst); + rb = get_rb(inst); + emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + if (emulated == EMULATE_FAIL) + emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + + return emulated; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int emulated = EMULATE_DONE; + ulong spr_val = kvmppc_get_gpr(vcpu, rs); + + switch (sprn) { + case SPRN_PID: + kvmppc_set_pid(vcpu, spr_val); + break; + case SPRN_PID1: + if (spr_val != 0) + return EMULATE_FAIL; + vcpu_e500->pid[1] = spr_val; break; + case SPRN_PID2: + if (spr_val != 0) + return EMULATE_FAIL; + vcpu_e500->pid[2] = spr_val; break; + case SPRN_MAS0: + vcpu->arch.shared->mas0 = spr_val; break; + case SPRN_MAS1: + vcpu->arch.shared->mas1 = spr_val; break; + case SPRN_MAS2: + vcpu->arch.shared->mas2 = spr_val; break; + case SPRN_MAS3: + vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff; + vcpu->arch.shared->mas7_3 |= spr_val; + break; + case SPRN_MAS4: + vcpu->arch.shared->mas4 = spr_val; break; + case SPRN_MAS6: + vcpu->arch.shared->mas6 = spr_val; break; + case SPRN_MAS7: + vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; + vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; + break; + case SPRN_L1CSR0: + vcpu_e500->l1csr0 = spr_val; + vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); + break; + case SPRN_L1CSR1: + vcpu_e500->l1csr1 = spr_val; break; + case SPRN_HID0: + vcpu_e500->hid0 = spr_val; break; + case SPRN_HID1: + vcpu_e500->hid1 = spr_val; break; + + case SPRN_MMUCSR0: + emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, + spr_val); + break; + + /* extra exceptions */ + case SPRN_IVOR32: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; + break; + case SPRN_IVOR33: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val; + break; + case SPRN_IVOR34: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; + break; + case SPRN_IVOR35: + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; + break; + + default: + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + } + + return emulated; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int emulated = EMULATE_DONE; + unsigned long val; + + switch (sprn) { + case SPRN_PID: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; + case SPRN_PID1: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break; + case SPRN_PID2: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; + case SPRN_MAS0: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break; + case SPRN_MAS1: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break; + case SPRN_MAS2: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break; + case SPRN_MAS3: + val = (u32)vcpu->arch.shared->mas7_3; + kvmppc_set_gpr(vcpu, rt, val); + break; + case SPRN_MAS4: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break; + case SPRN_MAS6: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break; + case SPRN_MAS7: + val = vcpu->arch.shared->mas7_3 >> 32; + kvmppc_set_gpr(vcpu, rt, val); + break; + case SPRN_TLB0CFG: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; + case SPRN_TLB1CFG: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break; + case SPRN_L1CSR0: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break; + case SPRN_L1CSR1: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break; + case SPRN_HID0: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; + case SPRN_HID1: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + case SPRN_SVR: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; + + case SPRN_MMUCSR0: + kvmppc_set_gpr(vcpu, rt, 0); break; + + case SPRN_MMUCFG: + kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break; + + /* extra exceptions */ + case SPRN_IVOR32: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]); + break; + case SPRN_IVOR33: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]); + break; + case SPRN_IVOR34: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]); + break; + case SPRN_IVOR35: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); + break; + default: + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + } + + return emulated; +} + diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c new file mode 100644 index 00000000..6e53e416 --- /dev/null +++ b/arch/powerpc/kvm/e500_tlb.c @@ -0,0 +1,1392 @@ +/* + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <linux/log2.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <linux/rwsem.h> +#include <linux/vmalloc.h> +#include <linux/hugetlb.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_e500.h> + +#include "../mm/mmu_decl.h" +#include "e500_tlb.h" +#include "trace.h" +#include "timing.h" + +#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) + +struct id { + unsigned long val; + struct id **pentry; +}; + +#define NUM_TIDS 256 + +/* + * This table provide mappings from: + * (guestAS,guestTID,guestPR) --> ID of physical cpu + * guestAS [0..1] + * guestTID [0..255] + * guestPR [0..1] + * ID [1..255] + * Each vcpu keeps one vcpu_id_table. + */ +struct vcpu_id_table { + struct id id[2][NUM_TIDS][2]; +}; + +/* + * This table provide reversed mappings of vcpu_id_table: + * ID --> address of vcpu_id_table item. + * Each physical core has one pcpu_id_table. + */ +struct pcpu_id_table { + struct id *entry[NUM_TIDS]; +}; + +static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids); + +/* This variable keeps last used shadow ID on local core. + * The valid range of shadow ID is [1..255] */ +static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); + +static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; + +static struct kvm_book3e_206_tlb_entry *get_entry( + struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) +{ + int offset = vcpu_e500->gtlb_offset[tlbsel]; + return &vcpu_e500->gtlb_arch[offset + entry]; +} + +/* + * Allocate a free shadow id and setup a valid sid mapping in given entry. + * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_setup_one(struct id *entry) +{ + unsigned long sid; + int ret = -1; + + sid = ++(__get_cpu_var(pcpu_last_used_sid)); + if (sid < NUM_TIDS) { + __get_cpu_var(pcpu_sids).entry[sid] = entry; + entry->val = sid; + entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + ret = sid; + } + + /* + * If sid == NUM_TIDS, we've run out of sids. We return -1, and + * the caller will invalidate everything and start over. + * + * sid > NUM_TIDS indicates a race, which we disable preemption to + * avoid. + */ + WARN_ON(sid > NUM_TIDS); + + return ret; +} + +/* + * Check if given entry contain a valid shadow id mapping. + * An ID mapping is considered valid only if + * both vcpu and pcpu know this mapping. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_lookup(struct id *entry) +{ + if (entry && entry->val != 0 && + __get_cpu_var(pcpu_sids).entry[entry->val] == entry && + entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + return entry->val; + return -1; +} + +/* Invalidate all id mappings on local core -- call with preempt disabled */ +static inline void local_sid_destroy_all(void) +{ + __get_cpu_var(pcpu_last_used_sid) = 0; + memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); +} + +static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL); + return vcpu_e500->idt; +} + +static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->idt); +} + +/* Invalidate all mappings on vcpu */ +static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table)); + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* Invalidate one ID mapping on vcpu */ +static inline void kvmppc_e500_id_table_reset_one( + struct kvmppc_vcpu_e500 *vcpu_e500, + int as, int pid, int pr) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + + BUG_ON(as >= 2); + BUG_ON(pid >= NUM_TIDS); + BUG_ON(pr >= 2); + + idt->id[as][pid][pr].val = 0; + idt->id[as][pid][pr].pentry = NULL; + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* + * Map guest (vcpu,AS,ID,PR) to physical core shadow id. + * This function first lookup if a valid mapping exists, + * if not, then creates a new one. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, + unsigned int as, unsigned int gid, + unsigned int pr, int avoid_recursion) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + int sid; + + BUG_ON(as >= 2); + BUG_ON(gid >= NUM_TIDS); + BUG_ON(pr >= 2); + + sid = local_sid_lookup(&idt->id[as][gid][pr]); + + while (sid <= 0) { + /* No mapping yet */ + sid = local_sid_setup_one(&idt->id[as][gid][pr]); + if (sid <= 0) { + _tlbil_all(); + local_sid_destroy_all(); + } + + /* Update shadow pid when mappings are changed */ + if (!avoid_recursion) + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } + + return sid; +} + +/* Map guest pid to shadow. + * We use PID to keep shadow of current guest non-zero PID, + * and use PID1 to keep shadow of guest zero PID. + * So that guest tlbe with TID=0 can be accessed at any time */ +void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + preempt_disable(); + vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), + get_cur_pid(&vcpu_e500->vcpu), + get_cur_pr(&vcpu_e500->vcpu), 1); + vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), 0, + get_cur_pr(&vcpu_e500->vcpu), 1); + preempt_enable(); +} + +static inline unsigned int gtlb0_get_next_victim( + struct kvmppc_vcpu_e500 *vcpu_e500) +{ + unsigned int victim; + + victim = vcpu_e500->gtlb_nv[0]++; + if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways)) + vcpu_e500->gtlb_nv[0] = 0; + + return victim; +} + +static inline unsigned int tlb1_max_shadow_size(void) +{ + /* reserve one entry for magic page */ + return host_tlb_params[1].entries - tlbcam_index - 1; +} + +static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); +} + +static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) +{ + /* Mask off reserved bits. */ + mas3 &= MAS3_ATTRIB_MASK; + + if (!usermode) { + /* Guest is in supervisor mode, + * so we need to translate guest + * supervisor permissions into user permissions. */ + mas3 &= ~E500_TLB_USER_PERM_MASK; + mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; + } + + return mas3 | E500_TLB_SUPER_PERM_MASK; +} + +static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) +{ +#ifdef CONFIG_SMP + return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; +#else + return mas2 & MAS2_ATTRIB_MASK; +#endif +} + +/* + * writing shadow tlb entry to host TLB + */ +static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, + uint32_t mas0) +{ + unsigned long flags; + + local_irq_save(flags); + mtspr(SPRN_MAS0, mas0); + mtspr(SPRN_MAS1, stlbe->mas1); + mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); + mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); + mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); + asm volatile("isync; tlbwe" : : : "memory"); + local_irq_restore(flags); + + trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, + stlbe->mas2, stlbe->mas7_3); +} + +/* + * Acquire a mas0 with victim hint, as if we just took a TLB miss. + * + * We don't care about the address we're searching for, other than that it's + * in the right set and is not present in the TLB. Using a zero PID and a + * userspace address means we don't have to set and then restore MAS5, or + * calculate a proper MAS6 value. + */ +static u32 get_host_mas0(unsigned long eaddr) +{ + unsigned long flags; + u32 mas0; + + local_irq_save(flags); + mtspr(SPRN_MAS6, 0); + asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); + mas0 = mfspr(SPRN_MAS0); + local_irq_restore(flags); + + return mas0; +} + +/* sesel is for tlb1 only */ +static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) +{ + u32 mas0; + + if (tlbsel == 0) { + mas0 = get_host_mas0(stlbe->mas2); + __write_host_tlbe(stlbe, mas0); + } else { + __write_host_tlbe(stlbe, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(sesel))); + } +} + +void kvmppc_map_magic(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_entry magic; + ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + unsigned int stid; + pfn_t pfn; + + pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; + get_page(pfn_to_page(pfn)); + + preempt_disable(); + stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); + + magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | + MAS1_TSIZE(BOOK3E_PAGESZ_4K); + magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; + magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | + MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; + magic.mas8 = 0; + + __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); + preempt_enable(); +} + +void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + /* Shadow PID may be expired on local core */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) +{ +} + +static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); + struct vcpu_id_table *idt = vcpu_e500->idt; + unsigned int pr, tid, ts, pid; + u32 val, eaddr; + unsigned long flags; + + ts = get_tlb_ts(gtlbe); + tid = get_tlb_tid(gtlbe); + + preempt_disable(); + + /* One guest ID may be mapped to two shadow IDs */ + for (pr = 0; pr < 2; pr++) { + /* + * The shadow PID can have a valid mapping on at most one + * host CPU. In the common case, it will be valid on this + * CPU, in which case (for TLB0) we do a local invalidation + * of the specific address. + * + * If the shadow PID is not valid on the current host CPU, or + * if we're invalidating a TLB1 entry, we invalidate the + * entire shadow PID. + */ + if (tlbsel == 1 || + (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) { + kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr); + continue; + } + + /* + * The guest is invalidating a TLB0 entry which is in a PID + * that has a valid shadow mapping on this host CPU. We + * search host TLB0 to invalidate it's shadow TLB entry, + * similar to __tlbil_va except that we need to look in AS1. + */ + val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS; + eaddr = get_tlb_eaddr(gtlbe); + + local_irq_save(flags); + + mtspr(SPRN_MAS6, val); + asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr)); + val = mfspr(SPRN_MAS1); + if (val & MAS1_VALID) { + mtspr(SPRN_MAS1, val & ~MAS1_VALID); + asm volatile("tlbwe"); + } + + local_irq_restore(flags); + } + + preempt_enable(); +} + +static int tlb0_set_base(gva_t addr, int sets, int ways) +{ + int set_base; + + set_base = (addr >> PAGE_SHIFT) & (sets - 1); + set_base *= ways; + + return set_base; +} + +static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr) +{ + return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets, + vcpu_e500->gtlb_params[0].ways); +} + +static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel = get_tlb_esel_bit(vcpu); + + if (tlbsel == 0) { + esel &= vcpu_e500->gtlb_params[0].ways - 1; + esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2); + } else { + esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; + } + + return esel; +} + +/* Search the guest TLB for a matching entry. */ +static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, + gva_t eaddr, int tlbsel, unsigned int pid, int as) +{ + int size = vcpu_e500->gtlb_params[tlbsel].entries; + unsigned int set_base, offset; + int i; + + if (tlbsel == 0) { + set_base = gtlb0_set_base(vcpu_e500, eaddr); + size = vcpu_e500->gtlb_params[0].ways; + } else { + set_base = 0; + } + + offset = vcpu_e500->gtlb_offset[tlbsel]; + + for (i = 0; i < size; i++) { + struct kvm_book3e_206_tlb_entry *tlbe = + &vcpu_e500->gtlb_arch[offset + set_base + i]; + unsigned int tid; + + if (eaddr < get_tlb_eaddr(tlbe)) + continue; + + if (eaddr > get_tlb_end(tlbe)) + continue; + + tid = get_tlb_tid(tlbe); + if (tid && (tid != pid)) + continue; + + if (!get_tlb_v(tlbe)) + continue; + + if (get_tlb_ts(tlbe) != as && as != -1) + continue; + + return set_base + i; + } + + return -1; +} + +static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, + struct kvm_book3e_206_tlb_entry *gtlbe, + pfn_t pfn) +{ + ref->pfn = pfn; + ref->flags = E500_TLB_VALID; + + if (tlbe_is_writable(gtlbe)) + ref->flags |= E500_TLB_DIRTY; +} + +static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) +{ + if (ref->flags & E500_TLB_VALID) { + if (ref->flags & E500_TLB_DIRTY) + kvm_release_pfn_dirty(ref->pfn); + else + kvm_release_pfn_clean(ref->pfn); + + ref->flags = 0; + } +} + +static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int tlbsel = 0; + int i; + + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->gtlb_priv[tlbsel][i].ref; + kvmppc_e500_ref_release(ref); + } +} + +static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int stlbsel = 1; + int i; + + kvmppc_e500_id_table_reset_all(vcpu_e500); + + for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->tlb_refs[stlbsel][i]; + kvmppc_e500_ref_release(ref); + } + + clear_tlb_privs(vcpu_e500); +} + +static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, + unsigned int eaddr, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int victim, pidsel, tsized; + int tlbsel; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; + victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; + pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf; + tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; + + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) + | MAS1_TID(vcpu_e500->pid[pidsel]) + | MAS1_TSIZE(tsized); + vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) + | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); + vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1) + | (get_cur_pid(vcpu) << 16) + | (as ? MAS6_SAS : 0); +} + +/* TID must be supplied by the caller */ +static inline void kvmppc_e500_setup_stlbe( + struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + int tsize, struct tlbe_ref *ref, u64 gvaddr, + struct kvm_book3e_206_tlb_entry *stlbe) +{ + pfn_t pfn = ref->pfn; + + BUG_ON(!(ref->flags & E500_TLB_VALID)); + + /* Force TS=1 IPROT=0 for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) + | e500_shadow_mas2_attrib(gtlbe->mas2, + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); + stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) + | e500_shadow_mas3_attrib(gtlbe->mas7_3, + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); +} + +static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, + struct tlbe_ref *ref) +{ + struct kvm_memory_slot *slot; + unsigned long pfn, hva; + int pfnmap = 0; + int tsize = BOOK3E_PAGESZ_4K; + + /* + * Translate guest physical to true physical, acquiring + * a page reference if it is normal, non-reserved memory. + * + * gfn_to_memslot() must succeed because otherwise we wouldn't + * have gotten this far. Eventually we should just pass the slot + * pointer through from the first lookup. + */ + slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); + hva = gfn_to_hva_memslot(slot, gfn); + + if (tlbsel == 1) { + struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); + + vma = find_vma(current->mm, hva); + if (vma && hva >= vma->vm_start && + (vma->vm_flags & VM_PFNMAP)) { + /* + * This VMA is a physically contiguous region (e.g. + * /dev/mem) that bypasses normal Linux page + * management. Find the overlap between the + * vma and the memslot. + */ + + unsigned long start, end; + unsigned long slot_start, slot_end; + + pfnmap = 1; + + start = vma->vm_pgoff; + end = start + + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + + pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); + + slot_start = pfn - (gfn - slot->base_gfn); + slot_end = slot_start + slot->npages; + + if (start < slot_start) + start = slot_start; + if (end > slot_end) + end = slot_end; + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + + /* + * Now find the largest tsize (up to what the guest + * requested) that will cover gfn, stay within the + * range, and for which gfn and pfn are mutually + * aligned. + */ + + for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { + unsigned long gfn_start, gfn_end, tsize_pages; + tsize_pages = 1 << (tsize - 2); + + gfn_start = gfn & ~(tsize_pages - 1); + gfn_end = gfn_start + tsize_pages; + + if (gfn_start + pfn - gfn < start) + continue; + if (gfn_end + pfn - gfn > end) + continue; + if ((gfn & (tsize_pages - 1)) != + (pfn & (tsize_pages - 1))) + continue; + + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn &= ~(tsize_pages - 1); + break; + } + } else if (vma && hva >= vma->vm_start && + (vma->vm_flags & VM_HUGETLB)) { + unsigned long psize = vma_kernel_pagesize(vma); + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * Take the largest page size that satisfies both host + * and guest mapping + */ + tsize = min(__ilog2(psize) - 10, tsize); + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + } + + up_read(¤t->mm->mmap_sem); + } + + if (likely(!pfnmap)) { + unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); + pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); + if (is_error_pfn(pfn)) { + printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", + (long)gfn); + kvm_release_pfn_clean(pfn); + return; + } + + /* Align guest and physical address to page map boundaries */ + pfn &= ~(tsize_pages - 1); + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + } + + /* Drop old ref and setup new one. */ + kvmppc_e500_ref_release(ref); + kvmppc_e500_ref_setup(ref, gtlbe, pfn); + + kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe); +} + +/* XXX only map the one-one case, for now use TLB0 */ +static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, + int esel, + struct kvm_book3e_206_tlb_entry *stlbe) +{ + struct kvm_book3e_206_tlb_entry *gtlbe; + struct tlbe_ref *ref; + + gtlbe = get_entry(vcpu_e500, 0, esel); + ref = &vcpu_e500->gtlb_priv[0][esel].ref; + + kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), + get_tlb_raddr(gtlbe) >> PAGE_SHIFT, + gtlbe, 0, stlbe, ref); +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +/* XXX for both one-one and one-to-many , for now use TLB1 */ +static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe) +{ + struct tlbe_ref *ref; + unsigned int victim; + + victim = vcpu_e500->host_tlb1_nv++; + + if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) + vcpu_e500->host_tlb1_nv = 0; + + ref = &vcpu_e500->tlb_refs[1][victim]; + kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); + + return victim; +} + +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + /* Recalc shadow pid since MSR changes */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +static inline int kvmppc_e500_gtlbe_invalidate( + struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); + + if (unlikely(get_tlb_iprot(gtlbe))) + return -1; + + gtlbe->mas1 = 0; + + return 0; +} + +int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) +{ + int esel; + + if (value & MMUCSR0_TLB0FI) + for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); + if (value & MMUCSR0_TLB1FI) + for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); + + /* Invalidate all vcpu id mappings */ + kvmppc_e500_id_table_reset_all(vcpu_e500); + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int ia; + int esel, tlbsel; + gva_t ea; + + ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb); + + ia = (ea >> 2) & 0x1; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = (ea >> 3) & 0x1; + + if (ia) { + /* invalidate all entries */ + for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; + esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } else { + ea &= 0xfffff000; + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, + get_cur_pid(vcpu), -1); + if (esel >= 0) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } + + /* Invalidate all vcpu id mappings */ + kvmppc_e500_id_table_reset_all(vcpu_e500); + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel, esel; + struct kvm_book3e_206_tlb_entry *gtlbe; + + tlbsel = get_tlb_tlbsel(vcpu); + esel = get_tlb_esel(vcpu, tlbsel); + + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + vcpu->arch.shared->mas0 &= ~MAS0_NV(~0); + vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = gtlbe->mas1; + vcpu->arch.shared->mas2 = gtlbe->mas2; + vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int as = !!get_cur_sas(vcpu); + unsigned int pid = get_cur_spid(vcpu); + int esel, tlbsel; + struct kvm_book3e_206_tlb_entry *gtlbe = NULL; + gva_t ea; + + ea = kvmppc_get_gpr(vcpu, rb); + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); + if (esel >= 0) { + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + break; + } + } + + if (gtlbe) { + esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1; + + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = gtlbe->mas1; + vcpu->arch.shared->mas2 = gtlbe->mas2; + vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; + } else { + int victim; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1; + victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; + + vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) + | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); + vcpu->arch.shared->mas1 = + (vcpu->arch.shared->mas6 & MAS6_SPID0) + | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) + | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); + vcpu->arch.shared->mas2 &= MAS2_EPN; + vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & + MAS2_ATTRIB_MASK; + vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | + MAS3_U2 | MAS3_U3; + } + + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); + return EMULATE_DONE; +} + +/* sesel is for tlb1 only */ +static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, + int stlbsel, int sesel) +{ + int stid; + + preempt_disable(); + stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), + get_tlb_tid(gtlbe), + get_cur_pr(&vcpu_e500->vcpu), 0); + + stlbe->mas1 |= MAS1_TID(stid); + write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); + preempt_enable(); +} + +int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_entry *gtlbe; + int tlbsel, esel; + + tlbsel = get_tlb_tlbsel(vcpu); + esel = get_tlb_esel(vcpu, tlbsel); + + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + + if (get_tlb_v(gtlbe)) + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + + gtlbe->mas1 = vcpu->arch.shared->mas1; + gtlbe->mas2 = vcpu->arch.shared->mas2; + gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; + + trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, + gtlbe->mas2, gtlbe->mas7_3); + + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ + if (tlbe_is_host_safe(vcpu, gtlbe)) { + struct kvm_book3e_206_tlb_entry stlbe; + int stlbsel, sesel; + u64 eaddr; + u64 raddr; + + switch (tlbsel) { + case 0: + /* TLB0 */ + gtlbe->mas1 &= ~MAS1_TSIZE(~0); + gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); + + stlbsel = 0; + kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); + sesel = 0; /* unused */ + + break; + + case 1: + /* TLB1 */ + eaddr = get_tlb_eaddr(gtlbe); + raddr = get_tlb_raddr(gtlbe); + + /* Create a 4KB mapping on the host. + * If the guest wanted a large page, + * only the first 4KB is mapped here and the rest + * are mapped on the fly. */ + stlbsel = 1; + sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, + raddr >> PAGE_SHIFT, gtlbe, &stlbe); + break; + + default: + BUG(); + } + + write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); + } + + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); + return EMULATE_DONE; +} + +int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); +} + +void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as); +} + +gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, + gva_t eaddr) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_entry *gtlbe; + u64 pgmask; + + gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index)); + pgmask = get_tlb_bytes(gtlbe) - 1; + + return get_tlb_raddr(gtlbe) | (eaddr & pgmask); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, + unsigned int index) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe_priv *priv; + struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; + int tlbsel = tlbsel_of(index); + int esel = esel_of(index); + int stlbsel, sesel; + + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + + switch (tlbsel) { + case 0: + stlbsel = 0; + sesel = 0; /* unused */ + priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; + + kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, + &priv->ref, eaddr, &stlbe); + break; + + case 1: { + gfn_t gfn = gpaddr >> PAGE_SHIFT; + + stlbsel = 1; + sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, + gtlbe, &stlbe); + break; + } + + default: + BUG(); + break; + } + + write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); +} + +int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, + gva_t eaddr, unsigned int pid, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel, tlbsel; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); + if (esel >= 0) + return index_of(tlbsel, esel); + } + + return -1; +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (vcpu->arch.pid != pid) { + vcpu_e500->pid[0] = vcpu->arch.pid = pid; + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } +} + +void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + struct kvm_book3e_206_tlb_entry *tlbe; + + /* Insert large initial mapping for guest. */ + tlbe = get_entry(vcpu_e500, 1, 0); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); + tlbe->mas2 = 0; + tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; + + /* 4K map for serial output. Used by kernel wrapper. */ + tlbe = get_entry(vcpu_e500, 1, 1); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); + tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; + tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; +} + +static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int i; + + clear_tlb_refs(vcpu_e500); + kfree(vcpu_e500->gtlb_priv[0]); + kfree(vcpu_e500->gtlb_priv[1]); + + if (vcpu_e500->shared_tlb_pages) { + vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch, + PAGE_SIZE))); + + for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) { + set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]); + put_page(vcpu_e500->shared_tlb_pages[i]); + } + + vcpu_e500->num_shared_tlb_pages = 0; + vcpu_e500->shared_tlb_pages = NULL; + } else { + kfree(vcpu_e500->gtlb_arch); + } + + vcpu_e500->gtlb_arch = NULL; +} + +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, + struct kvm_config_tlb *cfg) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_params params; + char *virt; + struct page **pages; + struct tlbe_priv *privs[2] = {}; + size_t array_len; + u32 sets; + int num_pages, ret, i; + + if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV) + return -EINVAL; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)cfg->params, + sizeof(params))) + return -EFAULT; + + if (params.tlb_sizes[1] > 64) + return -EINVAL; + if (params.tlb_ways[1] != params.tlb_sizes[1]) + return -EINVAL; + if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0) + return -EINVAL; + if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0) + return -EINVAL; + + if (!is_power_of_2(params.tlb_ways[0])) + return -EINVAL; + + sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]); + if (!is_power_of_2(sets)) + return -EINVAL; + + array_len = params.tlb_sizes[0] + params.tlb_sizes[1]; + array_len *= sizeof(struct kvm_book3e_206_tlb_entry); + + if (cfg->array_len < array_len) + return -EINVAL; + + num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - + cfg->array / PAGE_SIZE; + pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); + if (ret < 0) + goto err_pages; + + if (ret != num_pages) { + num_pages = ret; + ret = -EFAULT; + goto err_put_page; + } + + virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); + if (!virt) + goto err_put_page; + + privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], + GFP_KERNEL); + privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], + GFP_KERNEL); + + if (!privs[0] || !privs[1]) + goto err_put_page; + + free_gtlb(vcpu_e500); + + vcpu_e500->gtlb_priv[0] = privs[0]; + vcpu_e500->gtlb_priv[1] = privs[1]; + + vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) + (virt + (cfg->array & (PAGE_SIZE - 1))); + + vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0]; + vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1]; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; + + vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + if (params.tlb_sizes[0] <= 2048) + vcpu_e500->tlb0cfg |= params.tlb_sizes[0]; + vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; + + vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu_e500->tlb1cfg |= params.tlb_sizes[1]; + vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + + vcpu_e500->shared_tlb_pages = pages; + vcpu_e500->num_shared_tlb_pages = num_pages; + + vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0]; + vcpu_e500->gtlb_params[0].sets = sets; + + vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; + vcpu_e500->gtlb_params[1].sets = 1; + + return 0; + +err_put_page: + kfree(privs[0]); + kfree(privs[1]); + + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + +err_pages: + kfree(pages); + return ret; +} + +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, + struct kvm_dirty_tlb *dirty) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + clear_tlb_refs(vcpu_e500); + return 0; +} + +int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); + int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; + + host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; + host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + /* + * This should never happen on real e500 hardware, but is + * architecturally possible -- e.g. in some weird nested + * virtualization case. + */ + if (host_tlb_params[0].entries == 0 || + host_tlb_params[1].entries == 0) { + pr_err("%s: need to know host tlb size\n", __func__); + return -ENODEV; + } + + host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> + TLBnCFG_ASSOC_SHIFT; + host_tlb_params[1].ways = host_tlb_params[1].entries; + + if (!is_power_of_2(host_tlb_params[0].entries) || + !is_power_of_2(host_tlb_params[0].ways) || + host_tlb_params[0].entries < host_tlb_params[0].ways || + host_tlb_params[0].ways == 0) { + pr_err("%s: bad tlb0 host config: %u entries %u ways\n", + __func__, host_tlb_params[0].entries, + host_tlb_params[0].ways); + return -ENODEV; + } + + host_tlb_params[0].sets = + host_tlb_params[0].entries / host_tlb_params[0].ways; + host_tlb_params[1].sets = 1; + + vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; + vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; + + vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM; + vcpu_e500->gtlb_params[0].sets = + KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; + + vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; + vcpu_e500->gtlb_params[1].sets = 1; + + vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); + if (!vcpu_e500->gtlb_arch) + return -ENOMEM; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; + + vcpu_e500->tlb_refs[0] = + kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, + GFP_KERNEL); + if (!vcpu_e500->tlb_refs[0]) + goto err; + + vcpu_e500->tlb_refs[1] = + kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->tlb_refs[1]) + goto err; + + vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[0].entries, + GFP_KERNEL); + if (!vcpu_e500->gtlb_priv[0]) + goto err; + + vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->gtlb_priv[1]) + goto err; + + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + goto err; + + /* Init TLB configuration register */ + vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries; + vcpu_e500->tlb0cfg |= + vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; + + vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries; + vcpu_e500->tlb0cfg |= + vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; + + return 0; + +err: + free_gtlb(vcpu_e500); + kfree(vcpu_e500->tlb_refs[0]); + kfree(vcpu_e500->tlb_refs[1]); + return -1; +} + +void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + free_gtlb(vcpu_e500); + kvmppc_e500_id_table_free(vcpu_e500); + + kfree(vcpu_e500->tlb_refs[0]); + kfree(vcpu_e500->tlb_refs[1]); +} diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h new file mode 100644 index 00000000..5c6d2d7b --- /dev/null +++ b/arch/powerpc/kvm/e500_tlb.h @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.h, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef __KVM_E500_TLB_H__ +#define __KVM_E500_TLB_H__ + +#include <linux/kvm_host.h> +#include <asm/mmu-book3e.h> +#include <asm/tlb.h> +#include <asm/kvm_e500.h> + +/* This geometry is the legacy default -- can be overridden by userspace */ +#define KVM_E500_TLB0_WAY_SIZE 128 +#define KVM_E500_TLB0_WAY_NUM 2 + +#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) +#define KVM_E500_TLB1_SIZE 16 + +#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF)) +#define tlbsel_of(index) ((index) >> 16) +#define esel_of(index) ((index) & 0xFFFF) + +#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) +#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) +#define MAS2_ATTRIB_MASK \ + (MAS2_X0 | MAS2_X1) +#define MAS3_ATTRIB_MASK \ + (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ + | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) + +extern void kvmppc_dump_tlbs(struct kvm_vcpu *); +extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong); +extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); +extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); +extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); +extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); + +/* TLB helper functions */ +static inline unsigned int +get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 7) & 0x1f; +} + +static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas2 & 0xfffff000; +} + +static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1ULL << 10 << pgsize; +} + +static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + u64 bytes = get_tlb_bytes(tlbe); + return get_tlb_eaddr(tlbe) + bytes - 1; +} + +static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas7_3 & ~0xfffULL; +} + +static inline unsigned int +get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 16) & 0xff; +} + +static inline unsigned int +get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 12) & 0x1; +} + +static inline unsigned int +get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 31) & 0x1; +} + +static inline unsigned int +get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 30) & 0x1; +} + +static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pid & 0xff; +} + +static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS)); +} + +static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & MSR_PR); +} + +static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.shared->mas6 >> 16) & 0xff; +} + +static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.shared->mas6 & 0x1; +} + +static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu) +{ + /* + * Manual says that tlbsel has 2 bits wide. + * Since we only have two TLBs, only lower bit is used. + */ + return (vcpu->arch.shared->mas0 >> 28) & 0x1; +} + +static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.shared->mas0 & 0xfff; +} + +static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.shared->mas0 >> 16) & 0xfff; +} + +static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct kvm_book3e_206_tlb_entry *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +#endif /* __KVM_E500_TLB_H__ */ diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c new file mode 100644 index 00000000..968f4010 --- /dev/null +++ b/arch/powerpc/kvm/emulate.c @@ -0,0 +1,542 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/jiffies.h> +#include <linux/hrtimer.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm_host.h> + +#include <asm/reg.h> +#include <asm/time.h> +#include <asm/byteorder.h> +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include "timing.h" +#include "trace.h" + +#define OP_TRAP 3 +#define OP_TRAP_64 2 + +#define OP_31_XOP_LWZX 23 +#define OP_31_XOP_LBZX 87 +#define OP_31_XOP_STWX 151 +#define OP_31_XOP_STBX 215 +#define OP_31_XOP_LBZUX 119 +#define OP_31_XOP_STBUX 247 +#define OP_31_XOP_LHZX 279 +#define OP_31_XOP_LHZUX 311 +#define OP_31_XOP_MFSPR 339 +#define OP_31_XOP_LHAX 343 +#define OP_31_XOP_STHX 407 +#define OP_31_XOP_STHUX 439 +#define OP_31_XOP_MTSPR 467 +#define OP_31_XOP_DCBI 470 +#define OP_31_XOP_LWBRX 534 +#define OP_31_XOP_TLBSYNC 566 +#define OP_31_XOP_STWBRX 662 +#define OP_31_XOP_LHBRX 790 +#define OP_31_XOP_STHBRX 918 + +#define OP_LWZ 32 +#define OP_LWZU 33 +#define OP_LBZ 34 +#define OP_LBZU 35 +#define OP_STW 36 +#define OP_STWU 37 +#define OP_STB 38 +#define OP_STBU 39 +#define OP_LHZ 40 +#define OP_LHZU 41 +#define OP_LHA 42 +#define OP_LHAU 43 +#define OP_STH 44 +#define OP_STHU 45 + +void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) +{ + unsigned long dec_nsec; + unsigned long long dec_time; + + pr_debug("mtDEC: %x\n", vcpu->arch.dec); + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + +#ifdef CONFIG_PPC_BOOK3S + /* mtdec lowers the interrupt line when positive. */ + kvmppc_core_dequeue_dec(vcpu); + + /* POWER4+ triggers a dec interrupt if the value is < 0 */ + if (vcpu->arch.dec & 0x80000000) { + kvmppc_core_queue_dec(vcpu); + return; + } +#endif + +#ifdef CONFIG_BOOKE + /* On BOOKE, DEC = 0 is as good as decrementer not enabled */ + if (vcpu->arch.dec == 0) + return; +#endif + + /* + * The decrementer ticks at the same rate as the timebase, so + * that's how we convert the guest DEC value to the number of + * host ticks. + */ + + dec_time = vcpu->arch.dec; + dec_time *= 1000; + do_div(dec_time, tb_ticks_per_usec); + dec_nsec = do_div(dec_time, NSEC_PER_SEC); + hrtimer_start(&vcpu->arch.dec_timer, + ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); + vcpu->arch.dec_jiffies = get_tb(); +} + +u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) +{ + u64 jd = tb - vcpu->arch.dec_jiffies; + +#ifdef CONFIG_BOOKE + if (vcpu->arch.dec < jd) + return 0; +#endif + + return vcpu->arch.dec - jd; +} + +/* XXX to do: + * lhax + * lhaux + * lswx + * lswi + * stswx + * stswi + * lha + * lhau + * lmw + * stmw + * + * XXX is_bigendian should depend on MMU mapping or MSR[LE] + */ +/* XXX Should probably auto-generate instruction decoding for a particular core + * from opcode tables in the future. */ +int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u32 inst = kvmppc_get_last_inst(vcpu); + u32 ea; + int ra; + int rb; + int rs; + int rt; + int sprn; + enum emulation_result emulated = EMULATE_DONE; + int advance = 1; + + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + + pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); + + switch (get_op(inst)) { + case OP_TRAP: +#ifdef CONFIG_PPC_BOOK3S + case OP_TRAP_64: + kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); +#else + kvmppc_core_queue_program(vcpu, + vcpu->arch.shared->esr | ESR_PTR); +#endif + advance = 0; + break; + + case 31: + switch (get_xop(inst)) { + + case OP_31_XOP_LWZX: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case OP_31_XOP_LBZX: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_31_XOP_LBZUX: + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = kvmppc_get_gpr(vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(vcpu, ra); + + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, ea); + break; + + case OP_31_XOP_STWX: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + case OP_31_XOP_STBX: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_31_XOP_STBUX: + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = kvmppc_get_gpr(vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(vcpu, ra); + + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, rs, ea); + break; + + case OP_31_XOP_LHAX: + rt = get_rt(inst); + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZX: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZUX: + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = kvmppc_get_gpr(vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(vcpu, ra); + + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, ea); + break; + + case OP_31_XOP_MFSPR: + sprn = get_sprn(inst); + rt = get_rt(inst); + + switch (sprn) { + case SPRN_SRR0: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0); + break; + case SPRN_SRR1: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1); + break; + case SPRN_PVR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; + case SPRN_PIR: + kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break; + case SPRN_MSSSR0: + kvmppc_set_gpr(vcpu, rt, 0); break; + + /* Note: mftb and TBRL/TBWL are user-accessible, so + * the guest can always access the real TB anyways. + * In fact, we probably will never see these traps. */ + case SPRN_TBWL: + kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break; + case SPRN_TBWU: + kvmppc_set_gpr(vcpu, rt, get_tb()); break; + + case SPRN_SPRG0: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0); + break; + case SPRN_SPRG1: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1); + break; + case SPRN_SPRG2: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2); + break; + case SPRN_SPRG3: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3); + break; + /* Note: SPRG4-7 are user-readable, so we don't get + * a trap. */ + + case SPRN_DEC: + { + kvmppc_set_gpr(vcpu, rt, + kvmppc_get_dec(vcpu, get_tb())); + break; + } + default: + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); + if (emulated == EMULATE_FAIL) { + printk("mfspr: unknown spr %x\n", sprn); + kvmppc_set_gpr(vcpu, rt, 0); + } + break; + } + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); + break; + + case OP_31_XOP_STHX: + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_31_XOP_STHUX: + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = kvmppc_get_gpr(vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(vcpu, ra); + + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, ea); + break; + + case OP_31_XOP_MTSPR: + sprn = get_sprn(inst); + rs = get_rs(inst); + switch (sprn) { + case SPRN_SRR0: + vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs); + break; + case SPRN_SRR1: + vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs); + break; + + /* XXX We need to context-switch the timebase for + * watchdog and FIT. */ + case SPRN_TBWL: break; + case SPRN_TBWU: break; + + case SPRN_MSSSR0: break; + + case SPRN_DEC: + vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs); + kvmppc_emulate_dec(vcpu); + break; + + case SPRN_SPRG0: + vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs); + break; + case SPRN_SPRG1: + vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs); + break; + case SPRN_SPRG2: + vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs); + break; + case SPRN_SPRG3: + vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs); + break; + + default: + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); + if (emulated == EMULATE_FAIL) + printk("mtspr: unknown spr %x\n", sprn); + break; + } + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); + break; + + case OP_31_XOP_DCBI: + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. */ + break; + + case OP_31_XOP_LWBRX: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + break; + + case OP_31_XOP_TLBSYNC: + break; + + case OP_31_XOP_STWBRX: + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 0); + break; + + case OP_31_XOP_LHBRX: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); + break; + + case OP_31_XOP_STHBRX: + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 0); + break; + + default: + /* Attempt core-specific emulation below. */ + emulated = EMULATE_FAIL; + } + break; + + case OP_LWZ: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case OP_LWZU: + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + + case OP_LBZ: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_LBZU: + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + + case OP_STW: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + case OP_STWU: + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + + case OP_STB: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_STBU: + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + + case OP_LHZ: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_LHZU: + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + + case OP_LHA: + rt = get_rt(inst); + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_LHAU: + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + + case OP_STH: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_STHU: + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + + default: + emulated = EMULATE_FAIL; + } + + if (emulated == EMULATE_FAIL) { + emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); + if (emulated == EMULATE_AGAIN) { + advance = 0; + } else if (emulated == EMULATE_FAIL) { + advance = 0; + printk(KERN_ERR "Couldn't emulate instruction 0x%08x " + "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); + kvmppc_core_queue_program(vcpu, 0); + } + } + + trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); + + /* Advance past emulated instruction. */ + if (advance) + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + + return emulated; +} diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S new file mode 100644 index 00000000..bf68d597 --- /dev/null +++ b/arch/powerpc/kvm/fpu.S @@ -0,0 +1,283 @@ +/* + * FPU helper code to use FPU operations from inside the kernel + * + * Copyright (C) 2010 Alexander Graf (agraf@suse.de) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* Instructions operating on single parameters */ + +/* + * Single operation with one input operand + * + * R3 = (double*)&fpscr + * R4 = (short*)&result + * R5 = (short*)¶m1 + */ +#define FPS_ONE_IN(name) \ +_GLOBAL(fps_ ## name); \ + lfd 0,0(r3); /* load up fpscr value */ \ + MTFSF_L(0); \ + lfs 0,0(r5); \ + \ + name 0,0; \ + \ + stfs 0,0(r4); \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + blr + +/* + * Single operation with two input operands + * + * R3 = (double*)&fpscr + * R4 = (short*)&result + * R5 = (short*)¶m1 + * R6 = (short*)¶m2 + */ +#define FPS_TWO_IN(name) \ +_GLOBAL(fps_ ## name); \ + lfd 0,0(r3); /* load up fpscr value */ \ + MTFSF_L(0); \ + lfs 0,0(r5); \ + lfs 1,0(r6); \ + \ + name 0,0,1; \ + \ + stfs 0,0(r4); \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + blr + +/* + * Single operation with three input operands + * + * R3 = (double*)&fpscr + * R4 = (short*)&result + * R5 = (short*)¶m1 + * R6 = (short*)¶m2 + * R7 = (short*)¶m3 + */ +#define FPS_THREE_IN(name) \ +_GLOBAL(fps_ ## name); \ + lfd 0,0(r3); /* load up fpscr value */ \ + MTFSF_L(0); \ + lfs 0,0(r5); \ + lfs 1,0(r6); \ + lfs 2,0(r7); \ + \ + name 0,0,1,2; \ + \ + stfs 0,0(r4); \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + blr + +FPS_ONE_IN(fres) +FPS_ONE_IN(frsqrte) +FPS_ONE_IN(fsqrts) +FPS_TWO_IN(fadds) +FPS_TWO_IN(fdivs) +FPS_TWO_IN(fmuls) +FPS_TWO_IN(fsubs) +FPS_THREE_IN(fmadds) +FPS_THREE_IN(fmsubs) +FPS_THREE_IN(fnmadds) +FPS_THREE_IN(fnmsubs) +FPS_THREE_IN(fsel) + + +/* Instructions operating on double parameters */ + +/* + * Beginning of double instruction processing + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + * R7 = (double*)¶m2 [load_two] + * R8 = (double*)¶m3 [load_three] + * LR = instruction call function + */ +fpd_load_three: + lfd 2,0(r8) /* load param3 */ +fpd_load_two: + lfd 1,0(r7) /* load param2 */ +fpd_load_one: + lfd 0,0(r6) /* load param1 */ +fpd_load_none: + lfd 3,0(r3) /* load up fpscr value */ + MTFSF_L(3) + lwz r6, 0(r4) /* load cr */ + mtcr r6 + blr + +/* + * End of double instruction processing + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * LR = caller of instruction call function + */ +fpd_return: + mfcr r6 + stfd 0,0(r5) /* save result */ + mffs 0 + stfd 0,0(r3) /* save new fpscr value */ + stw r6,0(r4) /* save new cr value */ + blr + +/* + * Double operation with no input operand + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + */ +#define FPD_NONE_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_none; \ + mtlr r12; \ + \ + name. 0; /* call instruction */ \ + b fpd_return + +/* + * Double operation with one input operand + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + */ +#define FPD_ONE_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_one; \ + mtlr r12; \ + \ + name. 0,0; /* call instruction */ \ + b fpd_return + +/* + * Double operation with two input operands + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + * R7 = (double*)¶m2 + * R8 = (double*)¶m3 + */ +#define FPD_TWO_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_two; \ + mtlr r12; \ + \ + name. 0,0,1; /* call instruction */ \ + b fpd_return + +/* + * CR Double operation with two input operands + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)¶m1 + * R6 = (double*)¶m2 + * R7 = (double*)¶m3 + */ +#define FPD_TWO_IN_CR(name) \ +_GLOBAL(fpd_ ## name); \ + lfd 1,0(r6); /* load param2 */ \ + lfd 0,0(r5); /* load param1 */ \ + lfd 3,0(r3); /* load up fpscr value */ \ + MTFSF_L(3); \ + lwz r6, 0(r4); /* load cr */ \ + mtcr r6; \ + \ + name 0,0,1; /* call instruction */ \ + mfcr r6; \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + stw r6,0(r4); /* save new cr value */ \ + blr + +/* + * Double operation with three input operands + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + * R7 = (double*)¶m2 + * R8 = (double*)¶m3 + */ +#define FPD_THREE_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_three; \ + mtlr r12; \ + \ + name. 0,0,1,2; /* call instruction */ \ + b fpd_return + +FPD_ONE_IN(fsqrts) +FPD_ONE_IN(frsqrtes) +FPD_ONE_IN(fres) +FPD_ONE_IN(frsp) +FPD_ONE_IN(fctiw) +FPD_ONE_IN(fctiwz) +FPD_ONE_IN(fsqrt) +FPD_ONE_IN(fre) +FPD_ONE_IN(frsqrte) +FPD_ONE_IN(fneg) +FPD_ONE_IN(fabs) +FPD_TWO_IN(fadds) +FPD_TWO_IN(fsubs) +FPD_TWO_IN(fdivs) +FPD_TWO_IN(fmuls) +FPD_TWO_IN_CR(fcmpu) +FPD_TWO_IN(fcpsgn) +FPD_TWO_IN(fdiv) +FPD_TWO_IN(fadd) +FPD_TWO_IN(fmul) +FPD_TWO_IN_CR(fcmpo) +FPD_TWO_IN(fsub) +FPD_THREE_IN(fmsubs) +FPD_THREE_IN(fmadds) +FPD_THREE_IN(fnmsubs) +FPD_THREE_IN(fnmadds) +FPD_THREE_IN(fsel) +FPD_THREE_IN(fmsub) +FPD_THREE_IN(fmadd) +FPD_THREE_IN(fnmsub) +FPD_THREE_IN(fnmadd) + +_GLOBAL(kvm_cvt_fd) + lfs 0,0(r3) + stfd 0,0(r4) + blr + +_GLOBAL(kvm_cvt_df) + lfd 0,0(r3) + stfs 0,0(r4) + blr diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c new file mode 100644 index 00000000..00d7e345 --- /dev/null +++ b/arch/powerpc/kvm/powerpc.c @@ -0,0 +1,818 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/vmalloc.h> +#include <linux/hrtimer.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <asm/cputable.h> +#include <asm/uaccess.h> +#include <asm/kvm_ppc.h> +#include <asm/tlbflush.h> +#include <asm/cputhreads.h> +#include "timing.h" +#include "../mm/mmu_decl.h" + +#define CREATE_TRACE_POINTS +#include "trace.h" + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + return !(v->arch.shared->msr & MSR_WE) || + !!(v->arch.pending_exceptions) || + v->requests; +} + +int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) +{ + int nr = kvmppc_get_gpr(vcpu, 11); + int r; + unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3); + unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4); + unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5); + unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6); + unsigned long r2 = 0; + + if (!(vcpu->arch.shared->msr & MSR_SF)) { + /* 32 bit mode */ + param1 &= 0xffffffff; + param2 &= 0xffffffff; + param3 &= 0xffffffff; + param4 &= 0xffffffff; + } + + switch (nr) { + case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE: + { + vcpu->arch.magic_page_pa = param1; + vcpu->arch.magic_page_ea = param2; + + r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; + + r = HC_EV_SUCCESS; + break; + } + case HC_VENDOR_KVM | KVM_HC_FEATURES: + r = HC_EV_SUCCESS; +#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500) + /* XXX Missing magic page on 44x */ + r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); +#endif + + /* Second return value is in r4 */ + break; + default: + r = HC_EV_UNIMPLEMENTED; + break; + } + + kvmppc_set_gpr(vcpu, 4, r2); + + return r; +} + +int kvmppc_sanity_check(struct kvm_vcpu *vcpu) +{ + int r = false; + + /* We have to know what CPU to virtualize */ + if (!vcpu->arch.pvr) + goto out; + + /* PAPR only works with book3s_64 */ + if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled) + goto out; + +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* HV KVM can only do PAPR mode for now */ + if (!vcpu->arch.papr_enabled) + goto out; +#endif + + r = true; + +out: + vcpu->arch.sane = r; + return r ? 0 : -EINVAL; +} + +int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + int r; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + /* We must reload nonvolatiles because "update" load/store + * instructions modify register state. */ + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_HOST_NV; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, + kvmppc_get_last_inst(vcpu)); + r = RESUME_HOST; + break; + default: + BUG(); + } + + return r; +} + +int kvm_arch_hardware_enable(void *garbage) +{ + return 0; +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = kvmppc_core_check_processor_compat(); +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + + return kvmppc_core_init_vm(kvm); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_arch_vcpu_free(vcpu); + + mutex_lock(&kvm->lock); + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + + atomic_set(&kvm->online_vcpus, 0); + + kvmppc_core_destroy_vm(kvm); + + mutex_unlock(&kvm->lock); +} + +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_SREGS: +#else + case KVM_CAP_PPC_SEGSTATE: + case KVM_CAP_PPC_HIOR: + case KVM_CAP_PPC_PAPR: +#endif + case KVM_CAP_PPC_UNSET_IRQ: + case KVM_CAP_PPC_IRQ_LEVEL: + case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ONE_REG: + r = 1; + break; +#ifndef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_PPC_PAIRED_SINGLES: + case KVM_CAP_PPC_OSI: + case KVM_CAP_PPC_GET_PVINFO: +#ifdef CONFIG_KVM_E500 + case KVM_CAP_SW_TLB: +#endif + r = 1; + break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; +#endif +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_SPAPR_TCE: + r = 1; + break; + case KVM_CAP_PPC_SMT: + r = threads_per_core; + break; + case KVM_CAP_PPC_RMA: + r = 1; + /* PPC970 requires an RMA */ + if (cpu_has_feature(CPU_FTR_ARCH_201)) + r = 2; + break; + case KVM_CAP_SYNC_MMU: + r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; + break; +#endif + case KVM_CAP_NR_VCPUS: + /* + * Recommending a number of CPUs is somewhat arbitrary; we + * return the number of present CPUs for -HV (since a host + * will have secondary threads "offline"), and for other KVM + * implementations just count online CPUs. + */ +#ifdef CONFIG_KVM_BOOK3S_64_HV + r = num_present_cpus(); +#else + r = num_online_cpus(); +#endif + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + default: + r = 0; + break; + } + return r; + +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_memory_slot old, + struct kvm_userspace_memory_region *mem, + int user_alloc) +{ + return kvmppc_core_prepare_memory_region(kvm, mem); +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + kvmppc_core_commit_memory_region(kvm, mem); +} + + +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvm_vcpu *vcpu; + vcpu = kvmppc_core_vcpu_create(kvm, id); + if (!IS_ERR(vcpu)) { + vcpu->arch.wqp = &vcpu->wq; + kvmppc_create_vcpu_debugfs(vcpu, id); + } + return vcpu; +} + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +{ + /* Make sure we're not using the vcpu anymore */ + hrtimer_cancel(&vcpu->arch.dec_timer); + tasklet_kill(&vcpu->arch.tasklet); + + kvmppc_remove_vcpu_debugfs(vcpu); + kvmppc_core_vcpu_free(vcpu); +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_free(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return kvmppc_core_pending_dec(vcpu); +} + +/* + * low level hrtimer wake routine. Because this runs in hardirq context + * we schedule a tasklet to do the real work. + */ +enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); + tasklet_schedule(&vcpu->arch.tasklet); + + return HRTIMER_NORESTART; +} + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); + vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; + vcpu->arch.dec_expires = ~(u64)0; + +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvmppc_mmu_destroy(vcpu); +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +#ifdef CONFIG_BOOKE + /* + * vrsave (formerly usprg0) isn't used by Linux, but may + * be used by the guest. + * + * On non-booke this is associated with Altivec and + * is handled by code in book3s.c. + */ + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif + kvmppc_core_vcpu_load(vcpu, cpu); + vcpu->cpu = smp_processor_id(); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_core_vcpu_put(vcpu); +#ifdef CONFIG_BOOKE + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif + vcpu->cpu = -1; +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + +static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); +} + +static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + u64 uninitialized_var(gpr); + + if (run->mmio.len > sizeof(gpr)) { + printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); + return; + } + + if (vcpu->arch.mmio_is_bigendian) { + switch (run->mmio.len) { + case 8: gpr = *(u64 *)run->mmio.data; break; + case 4: gpr = *(u32 *)run->mmio.data; break; + case 2: gpr = *(u16 *)run->mmio.data; break; + case 1: gpr = *(u8 *)run->mmio.data; break; + } + } else { + /* Convert BE data from userland back to LE. */ + switch (run->mmio.len) { + case 4: gpr = ld_le32((u32 *)run->mmio.data); break; + case 2: gpr = ld_le16((u16 *)run->mmio.data); break; + case 1: gpr = *(u8 *)run->mmio.data; break; + } + } + + if (vcpu->arch.mmio_sign_extend) { + switch (run->mmio.len) { +#ifdef CONFIG_PPC64 + case 4: + gpr = (s64)(s32)gpr; + break; +#endif + case 2: + gpr = (s64)(s16)gpr; + break; + case 1: + gpr = (s64)(s8)gpr; + break; + } + } + + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); + + switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { + case KVM_MMIO_REG_GPR: + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); + break; + case KVM_MMIO_REG_FPR: + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + break; +#ifdef CONFIG_PPC_BOOK3S + case KVM_MMIO_REG_QPR: + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + break; + case KVM_MMIO_REG_FQPR: + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + break; +#endif + default: + BUG(); + } +} + +int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_bigendian) +{ + if (bytes > sizeof(run->mmio.data)) { + printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, + run->mmio.len); + } + + run->mmio.phys_addr = vcpu->arch.paddr_accessed; + run->mmio.len = bytes; + run->mmio.is_write = 0; + + vcpu->arch.io_gpr = rt; + vcpu->arch.mmio_is_bigendian = is_bigendian; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 0; + vcpu->arch.mmio_sign_extend = 0; + + return EMULATE_DO_MMIO; +} + +/* Same as above, but sign extends */ +int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_bigendian) +{ + int r; + + r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); + vcpu->arch.mmio_sign_extend = 1; + + return r; +} + +int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + u64 val, unsigned int bytes, int is_bigendian) +{ + void *data = run->mmio.data; + + if (bytes > sizeof(run->mmio.data)) { + printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, + run->mmio.len); + } + + run->mmio.phys_addr = vcpu->arch.paddr_accessed; + run->mmio.len = bytes; + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + + /* Store the value at the lowest bytes in 'data'. */ + if (is_bigendian) { + switch (bytes) { + case 8: *(u64 *)data = val; break; + case 4: *(u32 *)data = val; break; + case 2: *(u16 *)data = val; break; + case 1: *(u8 *)data = val; break; + } + } else { + /* Store LE value into 'data'. */ + switch (bytes) { + case 4: st_le32(data, val); break; + case 2: st_le16(data, val); break; + case 1: *(u8 *)data = val; break; + } + } + + return EMULATE_DO_MMIO; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int r; + sigset_t sigsaved; + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + kvmppc_complete_mmio_load(vcpu, run); + vcpu->mmio_needed = 0; + } else if (vcpu->arch.dcr_needed) { + if (!vcpu->arch.dcr_is_write) + kvmppc_complete_dcr_load(vcpu, run); + vcpu->arch.dcr_needed = 0; + } else if (vcpu->arch.osi_needed) { + u64 *gprs = run->osi.gprs; + int i; + + for (i = 0; i < 32; i++) + kvmppc_set_gpr(vcpu, i, gprs[i]); + vcpu->arch.osi_needed = 0; + } else if (vcpu->arch.hcall_needed) { + int i; + + kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret); + for (i = 0; i < 9; ++i) + kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); + vcpu->arch.hcall_needed = 0; + } + + r = kvmppc_vcpu_run(run, vcpu); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return r; +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; + + me = get_cpu(); + if (waitqueue_active(vcpu->arch.wqp)) { + wake_up_interruptible(vcpu->arch.wqp); + vcpu->stat.halt_wakeup++; + } else if (cpu != me && cpu != -1) { + smp_send_reschedule(vcpu->cpu); + } + put_cpu(); +} + +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) +{ + if (irq->irq == KVM_INTERRUPT_UNSET) { + kvmppc_core_dequeue_external(vcpu, irq); + return 0; + } + + kvmppc_core_queue_external(vcpu, irq); + kvm_vcpu_kick(vcpu); + + return 0; +} + +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_PPC_OSI: + r = 0; + vcpu->arch.osi_enabled = true; + break; + case KVM_CAP_PPC_PAPR: + r = 0; + vcpu->arch.papr_enabled = true; + break; +#ifdef CONFIG_KVM_E500 + case KVM_CAP_SW_TLB: { + struct kvm_config_tlb cfg; + void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; + + r = -EFAULT; + if (copy_from_user(&cfg, user_ptr, sizeof(cfg))) + break; + + r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg); + break; + } +#endif + default: + r = -EINVAL; + break; + } + + if (!r) + r = kvmppc_sanity_check(vcpu); + + return r; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + switch (ioctl) { + case KVM_INTERRUPT: { + struct kvm_interrupt irq; + r = -EFAULT; + if (copy_from_user(&irq, argp, sizeof(irq))) + goto out; + r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); + goto out; + } + + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } + + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: + { + struct kvm_one_reg reg; + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + goto out; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_vcpu_ioctl_set_one_reg(vcpu, ®); + else + r = kvm_vcpu_ioctl_get_one_reg(vcpu, ®); + break; + } + +#ifdef CONFIG_KVM_E500 + case KVM_DIRTY_TLB: { + struct kvm_dirty_tlb dirty; + r = -EFAULT; + if (copy_from_user(&dirty, argp, sizeof(dirty))) + goto out; + r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); + break; + } +#endif + + default: + r = -EINVAL; + } + +out: + return r; +} + +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) +{ + u32 inst_lis = 0x3c000000; + u32 inst_ori = 0x60000000; + u32 inst_nop = 0x60000000; + u32 inst_sc = 0x44000002; + u32 inst_imm_mask = 0xffff; + + /* + * The hypercall to get into KVM from within guest context is as + * follows: + * + * lis r0, r0, KVM_SC_MAGIC_R0@h + * ori r0, KVM_SC_MAGIC_R0@l + * sc + * nop + */ + pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask); + pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); + pvinfo->hcall[2] = inst_sc; + pvinfo->hcall[3] = inst_nop; + + return 0; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + long r; + + switch (ioctl) { + case KVM_PPC_GET_PVINFO: { + struct kvm_ppc_pvinfo pvinfo; + memset(&pvinfo, 0, sizeof(pvinfo)); + r = kvm_vm_ioctl_get_pvinfo(&pvinfo); + if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) { + r = -EFAULT; + goto out; + } + + break; + } +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CREATE_SPAPR_TCE: { + struct kvm_create_spapr_tce create_tce; + struct kvm *kvm = filp->private_data; + + r = -EFAULT; + if (copy_from_user(&create_tce, argp, sizeof(create_tce))) + goto out; + r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); + goto out; + } + + case KVM_ALLOCATE_RMA: { + struct kvm *kvm = filp->private_data; + struct kvm_allocate_rma rma; + + r = kvm_vm_ioctl_allocate_rma(kvm, &rma); + if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) + r = -EFAULT; + break; + } +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + + default: + r = -ENOTTY; + } + +out: + return r; +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c new file mode 100644 index 00000000..07b6110a --- /dev/null +++ b/arch/powerpc/kvm/timing.c @@ -0,0 +1,246 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/module.h> + +#include <asm/time.h> +#include <asm-generic/div64.h> + +#include "timing.h" + +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) +{ + int i; + + /* Take a lock to avoid concurrent updates */ + mutex_lock(&vcpu->arch.exit_timing_lock); + + vcpu->arch.last_exit_type = 0xDEAD; + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + vcpu->arch.timing_count_type[i] = 0; + vcpu->arch.timing_max_duration[i] = 0; + vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF; + vcpu->arch.timing_sum_duration[i] = 0; + vcpu->arch.timing_sum_quad_duration[i] = 0; + } + vcpu->arch.timing_last_exit = 0; + vcpu->arch.timing_exit.tv64 = 0; + vcpu->arch.timing_last_enter.tv64 = 0; + + mutex_unlock(&vcpu->arch.exit_timing_lock); +} + +static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) +{ + u64 old; + + mutex_lock(&vcpu->arch.exit_timing_lock); + + vcpu->arch.timing_count_type[type]++; + + /* sum */ + old = vcpu->arch.timing_sum_duration[type]; + vcpu->arch.timing_sum_duration[type] += duration; + if (unlikely(old > vcpu->arch.timing_sum_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, vcpu->arch.timing_sum_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* square sum */ + old = vcpu->arch.timing_sum_quad_duration[type]; + vcpu->arch.timing_sum_quad_duration[type] += (duration*duration); + if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of squared durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, + vcpu->arch.timing_sum_quad_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* set min/max */ + if (unlikely(duration < vcpu->arch.timing_min_duration[type])) + vcpu->arch.timing_min_duration[type] = duration; + if (unlikely(duration > vcpu->arch.timing_max_duration[type])) + vcpu->arch.timing_max_duration[type] = duration; + + mutex_unlock(&vcpu->arch.exit_timing_lock); +} + +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) +{ + u64 exit = vcpu->arch.timing_last_exit; + u64 enter = vcpu->arch.timing_last_enter.tv64; + + /* save exit time, used next exit when the reenter time is known */ + vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64; + + if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0)) + return; /* skip incomplete cycle (e.g. after reset) */ + + /* update statistics for average and standard deviation */ + add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type); + /* enter -> timing_last_exit is time spent in guest - log this too */ + add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter), + TIMEINGUEST); +} + +static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { + [MMIO_EXITS] = "MMIO", + [DCR_EXITS] = "DCR", + [SIGNAL_EXITS] = "SIGNAL", + [ITLB_REAL_MISS_EXITS] = "ITLBREAL", + [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", + [DTLB_REAL_MISS_EXITS] = "DTLBREAL", + [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT", + [SYSCALL_EXITS] = "SYSCALL", + [ISI_EXITS] = "ISI", + [DSI_EXITS] = "DSI", + [EMULATED_INST_EXITS] = "EMULINST", + [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT", + [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE", + [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR", + [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR", + [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR", + [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR", + [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX", + [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE", + [EMULATED_RFI_EXITS] = "EMUL_RFI", + [DEC_EXITS] = "DEC", + [EXT_INTR_EXITS] = "EXTINT", + [HALT_WAKEUP] = "HALT", + [USR_PR_INST] = "USR_PR_INST", + [FP_UNAVAIL] = "FP_UNAVAIL", + [DEBUG_EXITS] = "DEBUG", + [TIMEINGUEST] = "TIMEINGUEST" +}; + +static int kvmppc_exit_timing_show(struct seq_file *m, void *private) +{ + struct kvm_vcpu *vcpu = m->private; + int i; + u64 min, max, sum, sum_quad; + + seq_printf(m, "%s", "type count min max sum sum_squared\n"); + + + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + + min = vcpu->arch.timing_min_duration[i]; + do_div(min, tb_ticks_per_usec); + max = vcpu->arch.timing_max_duration[i]; + do_div(max, tb_ticks_per_usec); + sum = vcpu->arch.timing_sum_duration[i]; + do_div(sum, tb_ticks_per_usec); + sum_quad = vcpu->arch.timing_sum_quad_duration[i]; + do_div(sum_quad, tb_ticks_per_usec); + + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", + kvm_exit_names[i], + vcpu->arch.timing_count_type[i], + min, + max, + sum, + sum_quad); + + } + return 0; +} + +/* Write 'c' to clear the timing statistics. */ +static ssize_t kvmppc_exit_timing_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = -EINVAL; + char c; + + if (count > 1) { + goto done; + } + + if (get_user(c, user_buf)) { + err = -EFAULT; + goto done; + } + + if (c == 'c') { + struct seq_file *seqf = file->private_data; + struct kvm_vcpu *vcpu = seqf->private; + /* Write does not affect our buffers previously generated with + * show. seq_file is locked here to prevent races of init with + * a show call */ + mutex_lock(&seqf->lock); + kvmppc_init_timing_stats(vcpu); + mutex_unlock(&seqf->lock); + err = count; + } + +done: + return err; +} + +static int kvmppc_exit_timing_open(struct inode *inode, struct file *file) +{ + return single_open(file, kvmppc_exit_timing_show, inode->i_private); +} + +static const struct file_operations kvmppc_exit_timing_fops = { + .owner = THIS_MODULE, + .open = kvmppc_exit_timing_open, + .read = seq_read, + .write = kvmppc_exit_timing_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +{ + static char dbg_fname[50]; + struct dentry *debugfs_file; + + snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", + current->pid, id); + debugfs_file = debugfs_create_file(dbg_fname, 0666, + kvm_debugfs_dir, vcpu, + &kvmppc_exit_timing_fops); + + if (!debugfs_file) { + printk(KERN_ERR"%s: error creating debugfs file %s\n", + __func__, dbg_fname); + return; + } + + vcpu->arch.debugfs_exit_timing = debugfs_file; +} + +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.debugfs_exit_timing) { + debugfs_remove(vcpu->arch.debugfs_exit_timing); + vcpu->arch.debugfs_exit_timing = NULL; + } +} diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h new file mode 100644 index 00000000..8167d42a --- /dev/null +++ b/arch/powerpc/kvm/timing.h @@ -0,0 +1,106 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#ifndef __POWERPC_KVM_EXITTIMING_H__ +#define __POWERPC_KVM_EXITTIMING_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_host.h> + +#ifdef CONFIG_KVM_EXIT_TIMING +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); + +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) +{ + vcpu->arch.last_exit_type = type; +} + +#else +/* if exit timing is not configured there is no need to build the c file */ +static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, + unsigned int id) {} +static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} +#endif /* CONFIG_KVM_EXIT_TIMING */ + +/* account the exit in kvm_stats */ +static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) +{ + /* type has to be known at build time for optimization */ + + /* The BUILD_BUG_ON below breaks in funny ways, commented out + * for now ... -BenH + BUILD_BUG_ON(!__builtin_constant_p(type)); + */ + switch (type) { + case EXT_INTR_EXITS: + vcpu->stat.ext_intr_exits++; + break; + case DEC_EXITS: + vcpu->stat.dec_exits++; + break; + case EMULATED_INST_EXITS: + vcpu->stat.emulated_inst_exits++; + break; + case DCR_EXITS: + vcpu->stat.dcr_exits++; + break; + case DSI_EXITS: + vcpu->stat.dsi_exits++; + break; + case ISI_EXITS: + vcpu->stat.isi_exits++; + break; + case SYSCALL_EXITS: + vcpu->stat.syscall_exits++; + break; + case DTLB_REAL_MISS_EXITS: + vcpu->stat.dtlb_real_miss_exits++; + break; + case DTLB_VIRT_MISS_EXITS: + vcpu->stat.dtlb_virt_miss_exits++; + break; + case MMIO_EXITS: + vcpu->stat.mmio_exits++; + break; + case ITLB_REAL_MISS_EXITS: + vcpu->stat.itlb_real_miss_exits++; + break; + case ITLB_VIRT_MISS_EXITS: + vcpu->stat.itlb_virt_miss_exits++; + break; + case SIGNAL_EXITS: + vcpu->stat.signal_exits++; + break; + } +} + +/* wrapper to set exit time and account for it in kvm_stats */ +static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type) +{ + kvmppc_set_exit_type(vcpu, type); + kvmppc_account_exit_stat(vcpu, type); +} + +#endif /* __POWERPC_KVM_EXITTIMING_H__ */ diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h new file mode 100644 index 00000000..877186b7 --- /dev/null +++ b/arch/powerpc/kvm/trace.h @@ -0,0 +1,403 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + +/* + * Tracepoint for guest mode entry. + */ +TRACE_EVENT(kvm_ppc_instr, + TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate), + TP_ARGS(inst, _pc, emulate), + + TP_STRUCT__entry( + __field( unsigned int, inst ) + __field( unsigned long, pc ) + __field( unsigned int, emulate ) + ), + + TP_fast_assign( + __entry->inst = inst; + __entry->pc = _pc; + __entry->emulate = emulate; + ), + + TP_printk("inst %u pc 0x%lx emulate %u\n", + __entry->inst, __entry->pc, __entry->emulate) +); + +TRACE_EVENT(kvm_stlb_inval, + TP_PROTO(unsigned int stlb_index), + TP_ARGS(stlb_index), + + TP_STRUCT__entry( + __field( unsigned int, stlb_index ) + ), + + TP_fast_assign( + __entry->stlb_index = stlb_index; + ), + + TP_printk("stlb_index %u", __entry->stlb_index) +); + +TRACE_EVENT(kvm_stlb_write, + TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0, + unsigned int word1, unsigned int word2), + TP_ARGS(victim, tid, word0, word1, word2), + + TP_STRUCT__entry( + __field( unsigned int, victim ) + __field( unsigned int, tid ) + __field( unsigned int, word0 ) + __field( unsigned int, word1 ) + __field( unsigned int, word2 ) + ), + + TP_fast_assign( + __entry->victim = victim; + __entry->tid = tid; + __entry->word0 = word0; + __entry->word1 = word1; + __entry->word2 = word2; + ), + + TP_printk("victim %u tid %u w0 %u w1 %u w2 %u", + __entry->victim, __entry->tid, __entry->word0, + __entry->word1, __entry->word2) +); + +TRACE_EVENT(kvm_gtlb_write, + TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0, + unsigned int word1, unsigned int word2), + TP_ARGS(gtlb_index, tid, word0, word1, word2), + + TP_STRUCT__entry( + __field( unsigned int, gtlb_index ) + __field( unsigned int, tid ) + __field( unsigned int, word0 ) + __field( unsigned int, word1 ) + __field( unsigned int, word2 ) + ), + + TP_fast_assign( + __entry->gtlb_index = gtlb_index; + __entry->tid = tid; + __entry->word0 = word0; + __entry->word1 = word1; + __entry->word2 = word2; + ), + + TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u", + __entry->gtlb_index, __entry->tid, __entry->word0, + __entry->word1, __entry->word2) +); + + +/************************************************************************* + * Book3S trace points * + *************************************************************************/ + +#ifdef CONFIG_KVM_BOOK3S_PR + +TRACE_EVENT(kvm_book3s_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) + __field( unsigned long, srr1 ) + ), + + TP_fast_assign( + struct kvmppc_book3s_shadow_vcpu *svcpu; + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; + svcpu = svcpu_get(vcpu); + __entry->srr1 = svcpu->shadow_srr1; + svcpu_put(svcpu); + ), + + TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", + __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar, + __entry->srr1) +); + +TRACE_EVENT(kvm_book3s_reenter, + TP_PROTO(int r, struct kvm_vcpu *vcpu), + TP_ARGS(r, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, r ) + __field( unsigned long, pc ) + ), + + TP_fast_assign( + __entry->r = r; + __entry->pc = kvmppc_get_pc(vcpu); + ), + + TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) +); + +#ifdef CONFIG_PPC_BOOK3S_64 + +TRACE_EVENT(kvm_book3s_64_mmu_map, + TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, + struct kvmppc_pte *orig_pte), + TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), + + TP_STRUCT__entry( + __field( unsigned char, flag_w ) + __field( unsigned char, flag_x ) + __field( unsigned long, eaddr ) + __field( unsigned long, hpteg ) + __field( unsigned long, va ) + __field( unsigned long long, vpage ) + __field( unsigned long, hpaddr ) + ), + + TP_fast_assign( + __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; + __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; + __entry->eaddr = orig_pte->eaddr; + __entry->hpteg = hpteg; + __entry->va = va; + __entry->vpage = orig_pte->vpage; + __entry->hpaddr = hpaddr; + ), + + TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", + __entry->flag_w, __entry->flag_x, __entry->eaddr, + __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) +); + +#endif /* CONFIG_PPC_BOOK3S_64 */ + +TRACE_EVENT(kvm_book3s_mmu_map, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_va ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_va = pte->host_va; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_va, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_invalidate, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_va ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_va = pte->host_va; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_va, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_flush, + TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, + unsigned long long p2), + TP_ARGS(type, vcpu, p1, p2), + + TP_STRUCT__entry( + __field( int, count ) + __field( unsigned long long, p1 ) + __field( unsigned long long, p2 ) + __field( const char *, type ) + ), + + TP_fast_assign( + __entry->count = to_book3s(vcpu)->hpte_cache_count; + __entry->p1 = p1; + __entry->p2 = p2; + __entry->type = type; + ), + + TP_printk("Flush %d %sPTEs: %llx - %llx", + __entry->count, __entry->type, __entry->p1, __entry->p2) +); + +TRACE_EVENT(kvm_book3s_slb_found, + TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), + TP_ARGS(gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned long long, gvsid ) + __field( unsigned long long, hvsid ) + ), + + TP_fast_assign( + __entry->gvsid = gvsid; + __entry->hvsid = hvsid; + ), + + TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) +); + +TRACE_EVENT(kvm_book3s_slb_fail, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), + TP_ARGS(sid_map_mask, gvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, gvsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->gvsid = gvsid; + ), + + TP_printk("%x/%x: %llx", __entry->sid_map_mask, + SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) +); + +TRACE_EVENT(kvm_book3s_slb_map, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, + unsigned long long hvsid), + TP_ARGS(sid_map_mask, gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, guest_vsid ) + __field( unsigned long long, host_vsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->guest_vsid = gvsid; + __entry->host_vsid = hvsid; + ), + + TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, + __entry->guest_vsid, __entry->host_vsid) +); + +TRACE_EVENT(kvm_book3s_slbmte, + TP_PROTO(u64 slb_vsid, u64 slb_esid), + TP_ARGS(slb_vsid, slb_esid), + + TP_STRUCT__entry( + __field( u64, slb_vsid ) + __field( u64, slb_esid ) + ), + + TP_fast_assign( + __entry->slb_vsid = slb_vsid; + __entry->slb_esid = slb_esid; + ), + + TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) +); + +#endif /* CONFIG_PPC_BOOK3S */ + + +/************************************************************************* + * Book3E trace points * + *************************************************************************/ + +#ifdef CONFIG_BOOKE + +TRACE_EVENT(kvm_booke206_stlb_write, + TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), + TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), + + TP_STRUCT__entry( + __field( __u32, mas0 ) + __field( __u32, mas8 ) + __field( __u32, mas1 ) + __field( __u64, mas2 ) + __field( __u64, mas7_3 ) + ), + + TP_fast_assign( + __entry->mas0 = mas0; + __entry->mas8 = mas8; + __entry->mas1 = mas1; + __entry->mas2 = mas2; + __entry->mas7_3 = mas7_3; + ), + + TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", + __entry->mas0, __entry->mas8, __entry->mas1, + __entry->mas2, __entry->mas7_3) +); + +TRACE_EVENT(kvm_booke206_gtlb_write, + TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), + TP_ARGS(mas0, mas1, mas2, mas7_3), + + TP_STRUCT__entry( + __field( __u32, mas0 ) + __field( __u32, mas1 ) + __field( __u64, mas2 ) + __field( __u64, mas7_3 ) + ), + + TP_fast_assign( + __entry->mas0 = mas0; + __entry->mas1 = mas1; + __entry->mas2 = mas2; + __entry->mas7_3 = mas7_3; + ), + + TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", + __entry->mas0, __entry->mas1, + __entry->mas2, __entry->mas7_3) +); + +#endif + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> |