summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x.c180
-rw-r--r--arch/powerpc/kvm/44x_emulate.c183
-rw-r--r--arch/powerpc/kvm/44x_tlb.c524
-rw-r--r--arch/powerpc/kvm/44x_tlb.h86
-rw-r--r--arch/powerpc/kvm/Kconfig134
-rw-r--r--arch/powerpc/kvm/Makefile95
-rw-r--r--arch/powerpc/kvm/book3s.c486
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c419
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c393
-rw-r--r--arch/powerpc/kvm/book3s_32_sr.S143
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c536
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c330
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c1027
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S167
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c73
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c592
-rw-r--r--arch/powerpc/kvm/book3s_exports.c35
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1437
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c230
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S170
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c817
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S1712
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S201
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c352
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c1270
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1176
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c158
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S242
-rw-r--r--arch/powerpc/kvm/book3s_segment.S367
-rw-r--r--arch/powerpc/kvm/booke.c1023
-rw-r--r--arch/powerpc/kvm/booke.h74
-rw-r--r--arch/powerpc/kvm/booke_emulate.c272
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S487
-rw-r--r--arch/powerpc/kvm/e500.c264
-rw-r--r--arch/powerpc/kvm/e500_emulate.c217
-rw-r--r--arch/powerpc/kvm/e500_tlb.c1392
-rw-r--r--arch/powerpc/kvm/e500_tlb.h174
-rw-r--r--arch/powerpc/kvm/emulate.c542
-rw-r--r--arch/powerpc/kvm/fpu.S283
-rw-r--r--arch/powerpc/kvm/powerpc.c818
-rw-r--r--arch/powerpc/kvm/timing.c246
-rw-r--r--arch/powerpc/kvm/timing.h106
-rw-r--r--arch/powerpc/kvm/trace.h403
43 files changed, 19836 insertions, 0 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 00000000..7b612a76
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,180 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_44x.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvmppc_44x_tlb_load(vcpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kvmppc_44x_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
+ int i;
+
+ tlbe->tid = 0;
+ tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+ tlbe->word1 = 0;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+ tlbe++;
+ tlbe->tid = 0;
+ tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+ tlbe->word1 = 0xef600000;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+ | PPC44x_TLB_I | PPC44x_TLB_G;
+
+ /* Since the guest can directly access the timebase, it must know the
+ * real timebase frequency. Accordingly, it must see the state of
+ * CCR1[TCS]. */
+ /* XXX CCR1 doesn't exist on all 440 SoCs. */
+ vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
+ vcpu_44x->shadow_refs[i].gtlb_index = -1;
+
+ vcpu->arch.cpu_type = KVM_CPU_440;
+
+ return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+ if (index == -1) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_44x) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vcpu = &vcpu_44x->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+ if (!vcpu->arch.shared)
+ goto uninit_vcpu;
+
+ return vcpu;
+
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+}
+
+static int __init kvmppc_44x_init(void)
+{
+ int r;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+}
+
+static void __exit kvmppc_44x_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_44x_init);
+module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 00000000..549bb2c9
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,183 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
+
+#include "booke.h"
+#include "44x_tlb.h"
+
+#define XOP_MFDCR 323
+#define XOP_MTDCR 451
+#define XOP_TLBSX 914
+#define XOP_ICCCI 966
+#define XOP_TLBWE 978
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int dcrn;
+ int ra;
+ int rb;
+ int rc;
+ int rs;
+ int rt;
+ int ws;
+
+ switch (get_op(inst)) {
+ case 31:
+ switch (get_xop(inst)) {
+
+ case XOP_MFDCR:
+ dcrn = get_dcrn(inst);
+ rt = get_rt(inst);
+
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ kvmppc_set_gpr(vcpu, rt,
+ mfdcr(DCRN_CPR0_CONFIG_DATA));
+ local_irq_enable();
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = 0;
+ run->dcr.is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case XOP_MTDCR:
+ dcrn = get_dcrn(inst);
+ rs = get_rs(inst);
+
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = kvmppc_get_gpr(vcpu, rs);
+ run->dcr.is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case XOP_TLBWE:
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ ws = get_ws(inst);
+ emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
+ break;
+
+ case XOP_TLBSX:
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ rc = get_rc(inst);
+ emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
+ break;
+
+ case XOP_ICCCI:
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ if (emulated == EMULATE_FAIL)
+ emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_PID:
+ kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
+ case SPRN_MMUCR:
+ vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
+ case SPRN_CCR0:
+ vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
+ case SPRN_CCR1:
+ vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
+ default:
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ }
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_PID:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
+ case SPRN_MMUCR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
+ case SPRN_CCR0:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
+ case SPRN_CCR1:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
+ default:
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ }
+
+ return emulated;
+}
+
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
new file mode 100644
index 00000000..33aa715d
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -0,0 +1,524 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu-44x.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
+
+#include "44x_tlb.h"
+#include "trace.h"
+
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
+#endif
+
+#define PAGE_SIZE_4K (1<<12)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
+
+#define PPC44x_TLB_UATTR_MASK \
+ (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
+#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
+#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
+
+#ifdef DEBUG
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *tlbe;
+ int i;
+
+ printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+ printk("| %2s | %3s | %8s | %8s | %8s |\n",
+ "nr", "tid", "word0", "word1", "word2");
+
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+ tlbe = &vcpu_44x->guest_tlb[i];
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ printk(" G%2d | %02X | %08X | %08X | %08X |\n",
+ i, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+ }
+}
+#endif
+
+static inline void kvmppc_44x_tlbie(unsigned int index)
+{
+ /* 0 <= index < 64, so the V bit is clear and we can use the index as
+ * word0. */
+ asm volatile(
+ "tlbwe %[index], %[index], 0\n"
+ :
+ : [index] "r"(index)
+ );
+}
+
+static inline void kvmppc_44x_tlbre(unsigned int index,
+ struct kvmppc_44x_tlbe *tlbe)
+{
+ asm volatile(
+ "tlbre %[word0], %[index], 0\n"
+ "mfspr %[tid], %[sprn_mmucr]\n"
+ "andi. %[tid], %[tid], 0xff\n"
+ "tlbre %[word1], %[index], 1\n"
+ "tlbre %[word2], %[index], 2\n"
+ : [word0] "=r"(tlbe->word0),
+ [word1] "=r"(tlbe->word1),
+ [word2] "=r"(tlbe->word2),
+ [tid] "=r"(tlbe->tid)
+ : [index] "r"(index),
+ [sprn_mmucr] "i"(SPRN_MMUCR)
+ : "cc"
+ );
+}
+
+static inline void kvmppc_44x_tlbwe(unsigned int index,
+ struct kvmppc_44x_tlbe *stlbe)
+{
+ unsigned long tmp;
+
+ asm volatile(
+ "mfspr %[tmp], %[sprn_mmucr]\n"
+ "rlwimi %[tmp], %[tid], 0, 0xff\n"
+ "mtspr %[sprn_mmucr], %[tmp]\n"
+ "tlbwe %[word0], %[index], 0\n"
+ "tlbwe %[word1], %[index], 1\n"
+ "tlbwe %[word2], %[index], 2\n"
+ : [tmp] "=&r"(tmp)
+ : [word0] "r"(stlbe->word0),
+ [word1] "r"(stlbe->word1),
+ [word2] "r"(stlbe->word2),
+ [tid] "r"(stlbe->tid),
+ [index] "r"(index),
+ [sprn_mmucr] "i"(SPRN_MMUCR)
+ );
+}
+
+static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
+{
+ /* We only care about the guest's permission and user bits. */
+ attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
+
+ if (!usermode) {
+ /* Guest is in supervisor mode, so we need to translate guest
+ * supervisor permissions into user permissions. */
+ attrib &= ~PPC44x_TLB_USER_PERM_MASK;
+ attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3;
+ }
+
+ /* Make sure host can always access this memory. */
+ attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
+
+ /* WIMGE = 0b00100 */
+ attrib |= PPC44x_TLB_M;
+
+ return attrib;
+}
+
+/* Load shadow TLB back into hardware. */
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+ if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+ kvmppc_44x_tlbwe(i, stlbe);
+ }
+}
+
+static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
+ unsigned int i)
+{
+ vcpu_44x->shadow_tlb_mod[i] = 1;
+}
+
+/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+ if (vcpu_44x->shadow_tlb_mod[i])
+ kvmppc_44x_tlbre(i, stlbe);
+
+ if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+ kvmppc_44x_tlbie(i);
+ }
+}
+
+
+/* Search the guest TLB for a matching entry. */
+int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
+ unsigned int as)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+ struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
+ unsigned int tid;
+
+ if (eaddr < get_tlb_eaddr(tlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(tlbe))
+ continue;
+
+ tid = get_tlb_tid(tlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ if (get_tlb_ts(tlbe) != as)
+ continue;
+
+ return i;
+ }
+
+ return -1;
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+ gva_t eaddr)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+ unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
+
+ return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+ return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
+
+ return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
+static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
+ unsigned int stlb_index)
+{
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
+
+ if (!ref->page)
+ return;
+
+ /* Discard from the TLB. */
+ /* Note: we could actually invalidate a host mapping, if the host overwrote
+ * this TLB entry since we inserted a guest mapping. */
+ kvmppc_44x_tlbie(stlb_index);
+
+ /* Now release the page. */
+ if (ref->writeable)
+ kvm_release_page_dirty(ref->page);
+ else
+ kvm_release_page_clean(ref->page);
+
+ ref->page = NULL;
+
+ /* XXX set tlb_44x_index to stlb_index? */
+
+ trace_kvm_stlb_inval(stlb_index);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i <= tlb_44x_hwater; i++)
+ kvmppc_44x_shadow_release(vcpu_44x, i);
+}
+
+/**
+ * kvmppc_mmu_map -- create a host mapping for guest memory
+ *
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+ unsigned int gtlb_index)
+{
+ struct kvmppc_44x_tlbe stlbe;
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+ struct kvmppc_44x_shadow_ref *ref;
+ struct page *new_page;
+ hpa_t hpaddr;
+ gfn_t gfn;
+ u32 asid = gtlbe->tid;
+ u32 flags = gtlbe->word2;
+ u32 max_bytes = get_tlb_bytes(gtlbe);
+ unsigned int victim;
+
+ /* Select TLB entry to clobber. Indirectly guard against races with the TLB
+ * miss handler by disabling interrupts. */
+ local_irq_disable();
+ victim = ++tlb_44x_index;
+ if (victim > tlb_44x_hwater)
+ victim = 0;
+ tlb_44x_index = victim;
+ local_irq_enable();
+
+ /* Get reference to new page. */
+ gfn = gpaddr >> PAGE_SHIFT;
+ new_page = gfn_to_page(vcpu->kvm, gfn);
+ if (is_error_page(new_page)) {
+ printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
+ (unsigned long long)gfn);
+ kvm_release_page_clean(new_page);
+ return;
+ }
+ hpaddr = page_to_phys(new_page);
+
+ /* Invalidate any previous shadow mappings. */
+ kvmppc_44x_shadow_release(vcpu_44x, victim);
+
+ /* XXX Make sure (va, size) doesn't overlap any other
+ * entries. 440x6 user manual says the result would be
+ * "undefined." */
+
+ /* XXX what about AS? */
+
+ /* Force TS=1 for all guest mappings. */
+ stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
+
+ if (max_bytes >= PAGE_SIZE) {
+ /* Guest mapping is larger than or equal to host page size. We can use
+ * a "native" host mapping. */
+ stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+ } else {
+ /* Guest mapping is smaller than host page size. We must restrict the
+ * size of the mapping to be at most the smaller of the two, but for
+ * simplicity we fall back to a 4K mapping (this is probably what the
+ * guest is using anyways). */
+ stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+
+ /* 'hpaddr' is a host page, which is larger than the mapping we're
+ * inserting here. To compensate, we must add the in-page offset to the
+ * sub-page. */
+ hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
+ }
+
+ stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+ stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+ vcpu->arch.shared->msr & MSR_PR);
+ stlbe.tid = !(asid & 0xff);
+
+ /* Keep track of the reference so we can properly release it later. */
+ ref = &vcpu_44x->shadow_refs[victim];
+ ref->page = new_page;
+ ref->gtlb_index = gtlb_index;
+ ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
+ ref->tid = stlbe.tid;
+
+ /* Insert shadow mapping into hardware TLB. */
+ kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
+ kvmppc_44x_tlbwe(victim, &stlbe);
+ trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
+ stlbe.word2);
+}
+
+/* For a particular guest TLB entry, invalidate the corresponding host TLB
+ * mappings and release the host pages. */
+static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
+ unsigned int gtlb_index)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+ if (ref->gtlb_index == gtlb_index)
+ kvmppc_44x_shadow_release(vcpu_44x, i);
+ }
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+ int usermode = vcpu->arch.shared->msr & MSR_PR;
+
+ vcpu->arch.shadow_pid = !usermode;
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ if (unlikely(vcpu->arch.pid == new_pid))
+ return;
+
+ vcpu->arch.pid = new_pid;
+
+ /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
+ * can't access guest kernel mappings (TID=1). When we switch to a new
+ * guest PID, which will also use host PID=0, we must discard the old guest
+ * userspace mappings. */
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+
+ if (ref->tid == 0)
+ kvmppc_44x_shadow_release(vcpu_44x, i);
+ }
+}
+
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct kvmppc_44x_tlbe *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *tlbe;
+ unsigned int gtlb_index;
+
+ gtlb_index = kvmppc_get_gpr(vcpu, ra);
+ if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) {
+ printk("%s: index %d\n", __func__, gtlb_index);
+ kvmppc_dump_vcpu(vcpu);
+ return EMULATE_FAIL;
+ }
+
+ tlbe = &vcpu_44x->guest_tlb[gtlb_index];
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ kvmppc_44x_invalidate(vcpu, gtlb_index);
+
+ switch (ws) {
+ case PPC44x_TLB_PAGEID:
+ tlbe->tid = get_mmucr_stid(vcpu);
+ tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
+ break;
+
+ case PPC44x_TLB_XLAT:
+ tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
+ break;
+
+ case PPC44x_TLB_ATTRIB:
+ tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (tlbe_is_host_safe(vcpu, tlbe)) {
+ gva_t eaddr;
+ gpa_t gpaddr;
+ u32 bytes;
+
+ eaddr = get_tlb_eaddr(tlbe);
+ gpaddr = get_tlb_raddr(tlbe);
+
+ /* Use the advertised page size to mask effective and real addrs. */
+ bytes = get_tlb_bytes(tlbe);
+ eaddr &= ~(bytes - 1);
+ gpaddr &= ~(bytes - 1);
+
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
+ }
+
+ trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+ return EMULATE_DONE;
+}
+
+int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+{
+ u32 ea;
+ int gtlb_index;
+ unsigned int as = get_mmucr_sts(vcpu);
+ unsigned int pid = get_mmucr_stid(vcpu);
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+ gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+ if (rc) {
+ u32 cr = kvmppc_get_cr(vcpu);
+
+ if (gtlb_index < 0)
+ kvmppc_set_cr(vcpu, cr & ~0x20000000);
+ else
+ kvmppc_set_cr(vcpu, cr | 0x20000000);
+ }
+ kvmppc_set_gpr(vcpu, rt, gtlb_index);
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+ return EMULATE_DONE;
+}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
new file mode 100644
index 00000000..a9ff80e5
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -0,0 +1,86 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_POWERPC_TLB_H__
+#define __KVM_POWERPC_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-44x.h>
+
+extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
+ unsigned int pid, unsigned int as);
+
+extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
+ u8 rc);
+extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
+
+/* TLB helper functions */
+static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
+{
+ return (tlbe->word0 >> 4) & 0xf;
+}
+
+static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
+{
+ return tlbe->word0 & 0xfffffc00;
+}
+
+static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1 << 10 << (pgsize << 1);
+}
+
+static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
+{
+ return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
+{
+ u64 word1 = tlbe->word1;
+ return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
+}
+
+static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
+{
+ return tlbe->tid & 0xff;
+}
+
+static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
+{
+ return (tlbe->word0 >> 8) & 0x1;
+}
+
+static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
+{
+ return (tlbe->word0 >> 9) & 0x1;
+}
+
+static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mmucr & 0xff;
+}
+
+static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.mmucr >> 16) & 0x1;
+}
+
+#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
new file mode 100644
index 00000000..8f64709a
--- /dev/null
+++ b/arch/powerpc/kvm/Kconfig
@@ -0,0 +1,134 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ bool
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+
+config KVM_BOOK3S_HANDLER
+ bool
+
+config KVM_BOOK3S_32_HANDLER
+ bool
+ select KVM_BOOK3S_HANDLER
+ select KVM_MMIO
+
+config KVM_BOOK3S_64_HANDLER
+ bool
+ select KVM_BOOK3S_HANDLER
+
+config KVM_BOOK3S_PR
+ bool
+ select KVM_MMIO
+
+config KVM_BOOK3S_32
+ tristate "KVM support for PowerPC book3s_32 processors"
+ depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT
+ select KVM
+ select KVM_BOOK3S_32_HANDLER
+ select KVM_BOOK3S_PR
+ ---help---
+ Support running unmodified book3s_32 guest kernels
+ in virtual machines on book3s_32 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_BOOK3S_64
+ tristate "KVM support for PowerPC book3s_64 processors"
+ depends on EXPERIMENTAL && PPC_BOOK3S_64
+ select KVM_BOOK3S_64_HANDLER
+ select KVM
+ ---help---
+ Support running unmodified book3s_64 and book3s_32 guest kernels
+ in virtual machines on book3s_64 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_BOOK3S_64_HV
+ bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+ depends on KVM_BOOK3S_64
+ select MMU_NOTIFIER
+ ---help---
+ Support running unmodified book3s_64 guest kernels in
+ virtual machines on POWER7 and PPC970 processors that have
+ hypervisor mode available to the host.
+
+ If you say Y here, KVM will use the hardware virtualization
+ facilities of POWER7 (and later) processors, meaning that
+ guest operating systems will run at full hardware speed
+ using supervisor and user modes. However, this also means
+ that KVM is not usable under PowerVM (pHyp), is only usable
+ on POWER7 (or later) processors and PPC970-family processors,
+ and cannot emulate a different processor from the host processor.
+
+ If unsure, say N.
+
+config KVM_BOOK3S_64_PR
+ def_bool y
+ depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
+ select KVM_BOOK3S_PR
+
+config KVM_440
+ bool "KVM support for PowerPC 440 processors"
+ depends on EXPERIMENTAL && 44x
+ select KVM
+ select KVM_MMIO
+ ---help---
+ Support running unmodified 440 guest kernels in virtual machines on
+ 440 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_EXIT_TIMING
+ bool "Detailed exit timing"
+ depends on KVM_440 || KVM_E500
+ ---help---
+ Calculate elapsed time for every exit/enter cycle. A per-vcpu
+ report is available in debugfs kvm/vm#_vcpu#_timing.
+ The overhead is relatively small, however it is not recommended for
+ production environments.
+
+ If unsure, say N.
+
+config KVM_E500
+ bool "KVM support for PowerPC E500 processors"
+ depends on EXPERIMENTAL && E500
+ select KVM
+ select KVM_MMIO
+ ---help---
+ Support running unmodified E500 guest kernels in virtual machines on
+ E500 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+source drivers/vhost/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
new file mode 100644
index 00000000..3688aeec
--- /dev/null
+++ b/arch/powerpc/kvm/Makefile
@@ -0,0 +1,95 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+
+CFLAGS_44x_tlb.o := -I.
+CFLAGS_e500_tlb.o := -I.
+CFLAGS_emulate.o := -I.
+
+common-objs-y += powerpc.o emulate.o
+obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
+obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
+
+AFLAGS_booke_interrupts.o := -I$(obj)
+
+kvm-440-objs := \
+ $(common-objs-y) \
+ booke.o \
+ booke_emulate.o \
+ booke_interrupts.o \
+ 44x.o \
+ 44x_tlb.o \
+ 44x_emulate.o
+kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs)
+
+kvm-e500-objs := \
+ $(common-objs-y) \
+ booke.o \
+ booke_emulate.o \
+ booke_interrupts.o \
+ e500.o \
+ e500_tlb.o \
+ e500_emulate.o
+kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
+
+kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+ ../../../virt/kvm/coalesced_mmio.o \
+ fpu.o \
+ book3s_paired_singles.o \
+ book3s_pr.o \
+ book3s_pr_papr.o \
+ book3s_emulate.o \
+ book3s_interrupts.o \
+ book3s_mmu_hpte.o \
+ book3s_64_mmu_host.o \
+ book3s_64_mmu.o \
+ book3s_32_mmu.o
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+ book3s_rmhandlers.o
+
+kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+ book3s_hv.o \
+ book3s_hv_interrupts.o \
+ book3s_64_mmu_hv.o
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+ book3s_hv_rmhandlers.o \
+ book3s_hv_rm_mmu.o \
+ book3s_64_vio_hv.o \
+ book3s_hv_builtin.o
+
+kvm-book3s_64-module-objs := \
+ ../../../virt/kvm/kvm_main.o \
+ powerpc.o \
+ emulate.o \
+ book3s.o \
+ $(kvm-book3s_64-objs-y)
+
+kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
+
+kvm-book3s_32-objs := \
+ $(common-objs-y) \
+ fpu.o \
+ book3s_paired_singles.o \
+ book3s.o \
+ book3s_pr.o \
+ book3s_emulate.o \
+ book3s_interrupts.o \
+ book3s_mmu_hpte.o \
+ book3s_32_mmu_host.o \
+ book3s_32_mmu.o
+kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
+
+kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
+
+obj-$(CONFIG_KVM_440) += kvm.o
+obj-$(CONFIG_KVM_E500) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
+
+obj-y += $(kvm-book3s_64-builtin-objs-y)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
new file mode 100644
index 00000000..7d54f4ed
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+
+#include "trace.h"
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+/* #define EXIT_DEBUG */
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { "exits", VCPU_STAT(sum_exits) },
+ { "mmio", VCPU_STAT(mmio_exits) },
+ { "sig", VCPU_STAT(signal_exits) },
+ { "sysc", VCPU_STAT(syscall_exits) },
+ { "inst_emu", VCPU_STAT(emulated_inst_exits) },
+ { "dec", VCPU_STAT(dec_exits) },
+ { "ext_intr", VCPU_STAT(ext_intr_exits) },
+ { "queue_intr", VCPU_STAT(queue_intr) },
+ { "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "pf_storage", VCPU_STAT(pf_storage) },
+ { "sp_storage", VCPU_STAT(sp_storage) },
+ { "pf_instruc", VCPU_STAT(pf_instruc) },
+ { "sp_instruc", VCPU_STAT(sp_instruc) },
+ { "ld", VCPU_STAT(ld) },
+ { "ld_slow", VCPU_STAT(ld_slow) },
+ { "st", VCPU_STAT(st) },
+ { "st_slow", VCPU_STAT(st_slow) },
+ { NULL }
+};
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
+{
+ vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
+ vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
+ kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
+ vcpu->arch.mmu.reset_msr(vcpu);
+}
+
+static int kvmppc_book3s_vec2irqprio(unsigned int vec)
+{
+ unsigned int prio;
+
+ switch (vec) {
+ case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
+ case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
+ case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break;
+ case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break;
+ case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
+ case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
+ case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
+ case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
+ case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
+ case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
+ case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
+ case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break;
+ case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break;
+ case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break;
+ case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break;
+ case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break;
+ default: prio = BOOK3S_IRQPRIO_MAX; break;
+ }
+
+ return prio;
+}
+
+static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+ unsigned int vec)
+{
+ unsigned long old_pending = vcpu->arch.pending_exceptions;
+
+ clear_bit(kvmppc_book3s_vec2irqprio(vec),
+ &vcpu->arch.pending_exceptions);
+
+ kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions,
+ old_pending);
+}
+
+void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+{
+ vcpu->stat.queue_intr++;
+
+ set_bit(kvmppc_book3s_vec2irqprio(vec),
+ &vcpu->arch.pending_exceptions);
+#ifdef EXIT_DEBUG
+ printk(KERN_INFO "Queueing interrupt %x\n", vec);
+#endif
+}
+
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+ return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+ kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq)
+{
+ unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
+
+ if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+ vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
+}
+
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq)
+{
+ kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+ kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+}
+
+int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ int deliver = 1;
+ int vec = 0;
+ bool crit = kvmppc_critical_section(vcpu);
+
+ switch (priority) {
+ case BOOK3S_IRQPRIO_DECREMENTER:
+ deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+ vec = BOOK3S_INTERRUPT_DECREMENTER;
+ break;
+ case BOOK3S_IRQPRIO_EXTERNAL:
+ case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+ deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+ vec = BOOK3S_INTERRUPT_EXTERNAL;
+ break;
+ case BOOK3S_IRQPRIO_SYSTEM_RESET:
+ vec = BOOK3S_INTERRUPT_SYSTEM_RESET;
+ break;
+ case BOOK3S_IRQPRIO_MACHINE_CHECK:
+ vec = BOOK3S_INTERRUPT_MACHINE_CHECK;
+ break;
+ case BOOK3S_IRQPRIO_DATA_STORAGE:
+ vec = BOOK3S_INTERRUPT_DATA_STORAGE;
+ break;
+ case BOOK3S_IRQPRIO_INST_STORAGE:
+ vec = BOOK3S_INTERRUPT_INST_STORAGE;
+ break;
+ case BOOK3S_IRQPRIO_DATA_SEGMENT:
+ vec = BOOK3S_INTERRUPT_DATA_SEGMENT;
+ break;
+ case BOOK3S_IRQPRIO_INST_SEGMENT:
+ vec = BOOK3S_INTERRUPT_INST_SEGMENT;
+ break;
+ case BOOK3S_IRQPRIO_ALIGNMENT:
+ vec = BOOK3S_INTERRUPT_ALIGNMENT;
+ break;
+ case BOOK3S_IRQPRIO_PROGRAM:
+ vec = BOOK3S_INTERRUPT_PROGRAM;
+ break;
+ case BOOK3S_IRQPRIO_VSX:
+ vec = BOOK3S_INTERRUPT_VSX;
+ break;
+ case BOOK3S_IRQPRIO_ALTIVEC:
+ vec = BOOK3S_INTERRUPT_ALTIVEC;
+ break;
+ case BOOK3S_IRQPRIO_FP_UNAVAIL:
+ vec = BOOK3S_INTERRUPT_FP_UNAVAIL;
+ break;
+ case BOOK3S_IRQPRIO_SYSCALL:
+ vec = BOOK3S_INTERRUPT_SYSCALL;
+ break;
+ case BOOK3S_IRQPRIO_DEBUG:
+ vec = BOOK3S_INTERRUPT_TRACE;
+ break;
+ case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
+ vec = BOOK3S_INTERRUPT_PERFMON;
+ break;
+ default:
+ deliver = 0;
+ printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
+ break;
+ }
+
+#if 0
+ printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
+#endif
+
+ if (deliver)
+ kvmppc_inject_interrupt(vcpu, vec, 0);
+
+ return deliver;
+}
+
+/*
+ * This function determines if an irqprio should be cleared once issued.
+ */
+static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ switch (priority) {
+ case BOOK3S_IRQPRIO_DECREMENTER:
+ /* DEC interrupts get cleared by mtdec */
+ return false;
+ case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+ /* External interrupts get cleared by userspace */
+ return false;
+ }
+
+ return true;
+}
+
+void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ unsigned long *pending = &vcpu->arch.pending_exceptions;
+ unsigned long old_pending = vcpu->arch.pending_exceptions;
+ unsigned int priority;
+
+#ifdef EXIT_DEBUG
+ if (vcpu->arch.pending_exceptions)
+ printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
+#endif
+ priority = __ffs(*pending);
+ while (priority < BOOK3S_IRQPRIO_MAX) {
+ if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
+ clear_irqprio(vcpu, priority)) {
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
+ break;
+ }
+
+ priority = find_next_bit(pending,
+ BITS_PER_BYTE * sizeof(*pending),
+ priority + 1);
+ }
+
+ /* Tell the guest about our interrupt status */
+ kvmppc_update_int_pending(vcpu, *pending, old_pending);
+}
+
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ ulong mp_pa = vcpu->arch.magic_page_pa;
+
+ /* Magic page override */
+ if (unlikely(mp_pa) &&
+ unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
+ ((mp_pa & PAGE_MASK) & KVM_PAM))) {
+ ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+ pfn_t pfn;
+
+ pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
+ get_page(pfn_to_page(pfn));
+ return pfn;
+ }
+
+ return gfn_to_pfn(vcpu->kvm, gfn);
+}
+
+static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
+ struct kvmppc_pte *pte)
+{
+ int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
+ int r;
+
+ if (relocated) {
+ r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
+ } else {
+ pte->eaddr = eaddr;
+ pte->raddr = eaddr & KVM_PAM;
+ pte->vpage = VSID_REAL | eaddr >> 12;
+ pte->may_read = true;
+ pte->may_write = true;
+ pte->may_execute = true;
+ r = 0;
+ }
+
+ return r;
+}
+
+static hva_t kvmppc_bad_hva(void)
+{
+ return PAGE_OFFSET;
+}
+
+static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
+ bool read)
+{
+ hva_t hpage;
+
+ if (read && !pte->may_read)
+ goto err;
+
+ if (!read && !pte->may_write)
+ goto err;
+
+ hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+ if (kvm_is_error_hva(hpage))
+ goto err;
+
+ return hpage | (pte->raddr & ~PAGE_MASK);
+err:
+ return kvmppc_bad_hva();
+}
+
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+ bool data)
+{
+ struct kvmppc_pte pte;
+
+ vcpu->stat.st++;
+
+ if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+ return -ENOENT;
+
+ *eaddr = pte.raddr;
+
+ if (!pte.may_write)
+ return -EPERM;
+
+ if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
+ return EMULATE_DO_MMIO;
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+ bool data)
+{
+ struct kvmppc_pte pte;
+ hva_t hva = *eaddr;
+
+ vcpu->stat.ld++;
+
+ if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+ goto nopte;
+
+ *eaddr = pte.raddr;
+
+ hva = kvmppc_pte_to_hva(vcpu, &pte, true);
+ if (kvm_is_error_hva(hva))
+ goto mmio;
+
+ if (copy_from_user(ptr, (void __user *)hva, size)) {
+ printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
+ goto mmio;
+ }
+
+ return EMULATE_DONE;
+
+nopte:
+ return -ENOENT;
+mmio:
+ return EMULATE_DO_MMIO;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ regs->pc = kvmppc_get_pc(vcpu);
+ regs->cr = kvmppc_get_cr(vcpu);
+ regs->ctr = kvmppc_get_ctr(vcpu);
+ regs->lr = kvmppc_get_lr(vcpu);
+ regs->xer = kvmppc_get_xer(vcpu);
+ regs->msr = vcpu->arch.shared->msr;
+ regs->srr0 = vcpu->arch.shared->srr0;
+ regs->srr1 = vcpu->arch.shared->srr1;
+ regs->pid = vcpu->arch.pid;
+ regs->sprg0 = vcpu->arch.shared->sprg0;
+ regs->sprg1 = vcpu->arch.shared->sprg1;
+ regs->sprg2 = vcpu->arch.shared->sprg2;
+ regs->sprg3 = vcpu->arch.shared->sprg3;
+ regs->sprg4 = vcpu->arch.shared->sprg4;
+ regs->sprg5 = vcpu->arch.shared->sprg5;
+ regs->sprg6 = vcpu->arch.shared->sprg6;
+ regs->sprg7 = vcpu->arch.shared->sprg7;
+
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ kvmppc_set_pc(vcpu, regs->pc);
+ kvmppc_set_cr(vcpu, regs->cr);
+ kvmppc_set_ctr(vcpu, regs->ctr);
+ kvmppc_set_lr(vcpu, regs->lr);
+ kvmppc_set_xer(vcpu, regs->xer);
+ kvmppc_set_msr(vcpu, regs->msr);
+ vcpu->arch.shared->srr0 = regs->srr0;
+ vcpu->arch.shared->srr1 = regs->srr1;
+ vcpu->arch.shared->sprg0 = regs->sprg0;
+ vcpu->arch.shared->sprg1 = regs->sprg1;
+ vcpu->arch.shared->sprg2 = regs->sprg2;
+ vcpu->arch.shared->sprg3 = regs->sprg3;
+ vcpu->arch.shared->sprg4 = regs->sprg4;
+ vcpu->arch.shared->sprg5 = regs->sprg5;
+ vcpu->arch.shared->sprg6 = regs->sprg6;
+ vcpu->arch.shared->sprg7 = regs->sprg7;
+
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return 0;
+}
+
+void kvmppc_decrementer_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+ kvmppc_core_queue_dec(vcpu);
+ kvm_vcpu_kick(vcpu);
+}
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
new file mode 100644
index 00000000..c8cefdd1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -0,0 +1,419 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_MMU_PTE */
+/* #define DEBUG_MMU_PTE_IP 0xfff14c40 */
+
+#ifdef DEBUG_MMU
+#define dprintk(X...) printk(KERN_INFO X)
+#else
+#define dprintk(X...) do { } while(0)
+#endif
+
+#ifdef DEBUG_MMU_PTE
+#define dprintk_pte(X...) printk(KERN_INFO X)
+#else
+#define dprintk_pte(X...) do { } while(0)
+#endif
+
+#define PTEG_FLAG_ACCESSED 0x00000100
+#define PTEG_FLAG_DIRTY 0x00000080
+#ifndef SID_SHIFT
+#define SID_SHIFT 28
+#endif
+
+static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
+{
+#ifdef DEBUG_MMU_PTE_IP
+ return vcpu->arch.pc == DEBUG_MMU_PTE_IP;
+#else
+ return true;
+#endif
+}
+
+static inline u32 sr_vsid(u32 sr_raw)
+{
+ return sr_raw & 0x0fffffff;
+}
+
+static inline bool sr_valid(u32 sr_raw)
+{
+ return (sr_raw & 0x80000000) ? false : true;
+}
+
+static inline bool sr_ks(u32 sr_raw)
+{
+ return (sr_raw & 0x40000000) ? true: false;
+}
+
+static inline bool sr_kp(u32 sr_raw)
+{
+ return (sr_raw & 0x20000000) ? true: false;
+}
+
+static inline bool sr_nx(u32 sr_raw)
+{
+ return (sr_raw & 0x10000000) ? true: false;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *pte, bool data);
+static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
+ u64 *vsid);
+
+static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf];
+}
+
+static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
+ bool data)
+{
+ u64 vsid;
+ struct kvmppc_pte pte;
+
+ if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data))
+ return pte.vpage;
+
+ kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+ return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
+}
+
+static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
+{
+ kvmppc_set_msr(vcpu, 0);
+}
+
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
+ u32 sre, gva_t eaddr,
+ bool primary)
+{
+ u32 page, hash, pteg, htabmask;
+ hva_t r;
+
+ page = (eaddr & 0x0FFFFFFF) >> 12;
+ htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0;
+
+ hash = ((sr_vsid(sre) ^ page) << 6);
+ if (!primary)
+ hash = ~hash;
+ hash &= htabmask;
+
+ pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
+
+ dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
+ kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
+ sr_vsid(sre));
+
+ r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+ if (kvm_is_error_hva(r))
+ return r;
+ return r | (pteg & ~PAGE_MASK);
+}
+
+static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
+{
+ return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) |
+ (primary ? 0 : 0x40) | 0x80000000;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *pte, bool data)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ struct kvmppc_bat *bat;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (data)
+ bat = &vcpu_book3s->dbat[i];
+ else
+ bat = &vcpu_book3s->ibat[i];
+
+ if (vcpu->arch.shared->msr & MSR_PR) {
+ if (!bat->vp)
+ continue;
+ } else {
+ if (!bat->vs)
+ continue;
+ }
+
+ if (check_debug_ip(vcpu))
+ {
+ dprintk_pte("%cBAT %02d: 0x%lx - 0x%x (0x%x)\n",
+ data ? 'd' : 'i', i, eaddr, bat->bepi,
+ bat->bepi_mask);
+ }
+ if ((eaddr & bat->bepi_mask) == bat->bepi) {
+ u64 vsid;
+ kvmppc_mmu_book3s_32_esid_to_vsid(vcpu,
+ eaddr >> SID_SHIFT, &vsid);
+ vsid <<= 16;
+ pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid;
+
+ pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask);
+ pte->may_read = bat->pp;
+ pte->may_write = bat->pp > 1;
+ pte->may_execute = true;
+ if (!pte->may_read) {
+ printk(KERN_INFO "BAT is not readable!\n");
+ continue;
+ }
+ if (!pte->may_write) {
+ /* let's treat r/o BATs as not-readable for now */
+ dprintk_pte("BAT is read-only!\n");
+ continue;
+ }
+
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *pte, bool data,
+ bool primary)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ u32 sre;
+ hva_t ptegp;
+ u32 pteg[16];
+ u32 ptem = 0;
+ int i;
+ int found = 0;
+
+ sre = find_sr(vcpu, eaddr);
+
+ dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28,
+ sr_vsid(sre), sre);
+
+ pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
+
+ ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary);
+ if (kvm_is_error_hva(ptegp)) {
+ printk(KERN_INFO "KVM: Invalid PTEG!\n");
+ goto no_page_found;
+ }
+
+ ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary);
+
+ if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
+ printk(KERN_ERR "KVM: Can't copy data from 0x%lx!\n", ptegp);
+ goto no_page_found;
+ }
+
+ for (i=0; i<16; i+=2) {
+ if (ptem == pteg[i]) {
+ u8 pp;
+
+ pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
+ pp = pteg[i+1] & 3;
+
+ if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) ||
+ (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR)))
+ pp |= 4;
+
+ pte->may_write = false;
+ pte->may_read = false;
+ pte->may_execute = true;
+ switch (pp) {
+ case 0:
+ case 1:
+ case 2:
+ case 6:
+ pte->may_write = true;
+ case 3:
+ case 5:
+ case 7:
+ pte->may_read = true;
+ break;
+ }
+
+ if ( !pte->may_read )
+ continue;
+
+ dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
+ pteg[i], pteg[i+1], pp);
+ found = 1;
+ break;
+ }
+ }
+
+ /* Update PTE C and A bits, so the guest's swapper knows we used the
+ page */
+ if (found) {
+ u32 oldpte = pteg[i+1];
+
+ if (pte->may_read)
+ pteg[i+1] |= PTEG_FLAG_ACCESSED;
+ if (pte->may_write)
+ pteg[i+1] |= PTEG_FLAG_DIRTY;
+ else
+ dprintk_pte("KVM: Mapping read-only page!\n");
+
+ /* Write back into the PTEG */
+ if (pteg[i+1] != oldpte)
+ copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+
+ return 0;
+ }
+
+no_page_found:
+
+ if (check_debug_ip(vcpu)) {
+ dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n",
+ to_book3s(vcpu)->sdr1, ptegp);
+ for (i=0; i<16; i+=2) {
+ dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n",
+ i, pteg[i], pteg[i+1], ptem);
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *pte, bool data)
+{
+ int r;
+ ulong mp_ea = vcpu->arch.magic_page_ea;
+
+ pte->eaddr = eaddr;
+
+ /* Magic page override */
+ if (unlikely(mp_ea) &&
+ unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
+ pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
+ pte->raddr &= KVM_PAM;
+ pte->may_execute = true;
+ pte->may_read = true;
+ pte->may_write = true;
+
+ return 0;
+ }
+
+ r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
+ if (r < 0)
+ r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
+ if (r < 0)
+ r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false);
+
+ return r;
+}
+
+
+static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
+{
+ return vcpu->arch.shared->sr[srnum];
+}
+
+static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
+ ulong value)
+{
+ vcpu->arch.shared->sr[srnum] = value;
+ kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
+}
+
+static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
+{
+ kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000);
+}
+
+static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
+ u64 *vsid)
+{
+ ulong ea = esid << SID_SHIFT;
+ u32 sr;
+ u64 gvsid = esid;
+
+ if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ sr = find_sr(vcpu, ea);
+ if (sr_valid(sr))
+ gvsid = sr_vsid(sr);
+ }
+
+ /* In case we only have one of MSR_IR or MSR_DR set, let's put
+ that in the real-mode context (and hope RM doesn't access
+ high memory) */
+ switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ case 0:
+ *vsid = VSID_REAL | esid;
+ break;
+ case MSR_IR:
+ *vsid = VSID_REAL_IR | gvsid;
+ break;
+ case MSR_DR:
+ *vsid = VSID_REAL_DR | gvsid;
+ break;
+ case MSR_DR|MSR_IR:
+ if (sr_valid(sr))
+ *vsid = sr_vsid(sr);
+ else
+ *vsid = VSID_BAT | gvsid;
+ break;
+ default:
+ BUG();
+ }
+
+ if (vcpu->arch.shared->msr & MSR_PR)
+ *vsid |= VSID_PR;
+
+ return 0;
+}
+
+static bool kvmppc_mmu_book3s_32_is_dcbz32(struct kvm_vcpu *vcpu)
+{
+ return true;
+}
+
+
+void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+ mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
+ mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
+ mmu->xlate = kvmppc_mmu_book3s_32_xlate;
+ mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
+ mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
+ mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
+ mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
+ mmu->is_dcbz32 = kvmppc_mmu_book3s_32_is_dcbz32;
+
+ mmu->slbmte = NULL;
+ mmu->slbmfee = NULL;
+ mmu->slbmfev = NULL;
+ mmu->slbie = NULL;
+ mmu->slbia = NULL;
+}
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
new file mode 100644
index 00000000..f922c29b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash32.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_SR */
+
+#ifdef DEBUG_MMU
+#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_mmu(a, ...) do { } while(0)
+#endif
+
+#ifdef DEBUG_SR
+#define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_sr(a, ...) do { } while(0)
+#endif
+
+#if PAGE_SHIFT != 12
+#error Unknown page size
+#endif
+
+#ifdef CONFIG_SMP
+#error XXX need to grab mmu_hash_lock
+#endif
+
+#ifdef CONFIG_PTE_64BIT
+#error Only 32 bit pages are supported for now
+#endif
+
+static ulong htab;
+static u32 htabmask;
+
+void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+ volatile u32 *pteg;
+
+ /* Remove from host HTAB */
+ pteg = (u32*)pte->slot;
+ pteg[0] = 0;
+
+ /* And make sure it's gone from the TLB too */
+ asm volatile ("sync");
+ asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory");
+ asm volatile ("sync");
+ asm volatile ("tlbsync");
+}
+
+/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
+ * a hash, so we don't waste cycles on looping */
+static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+ return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+}
+
+
+static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+ struct kvmppc_sid_map *map;
+ u16 sid_map_mask;
+
+ if (vcpu->arch.shared->msr & MSR_PR)
+ gvsid |= VSID_PR;
+
+ sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+ map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+ if (map->guest_vsid == gvsid) {
+ dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n",
+ gvsid, map->host_vsid);
+ return map;
+ }
+
+ map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
+ if (map->guest_vsid == gvsid) {
+ dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n",
+ gvsid, map->host_vsid);
+ return map;
+ }
+
+ dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid);
+ return NULL;
+}
+
+static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
+ bool primary)
+{
+ u32 page, hash;
+ ulong pteg = htab;
+
+ page = (eaddr & ~ESID_MASK) >> 12;
+
+ hash = ((vsid ^ page) << 6);
+ if (!primary)
+ hash = ~hash;
+
+ hash &= htabmask;
+
+ pteg |= hash;
+
+ dprintk_mmu("htab: %lx | hash: %x | htabmask: %x | pteg: %lx\n",
+ htab, hash, htabmask, pteg);
+
+ return (u32*)pteg;
+}
+
+extern char etext[];
+
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+{
+ pfn_t hpaddr;
+ u64 va;
+ u64 vsid;
+ struct kvmppc_sid_map *map;
+ volatile u32 *pteg;
+ u32 eaddr = orig_pte->eaddr;
+ u32 pteg0, pteg1;
+ register int rr = 0;
+ bool primary = false;
+ bool evict = false;
+ struct hpte_cache *pte;
+ int r = 0;
+
+ /* Get host physical address for gpa */
+ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+ if (is_error_pfn(hpaddr)) {
+ printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
+ orig_pte->eaddr);
+ r = -EINVAL;
+ goto out;
+ }
+ hpaddr <<= PAGE_SHIFT;
+
+ /* and write the mapping ea -> hpa into the pt */
+ vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
+ map = find_sid_vsid(vcpu, vsid);
+ if (!map) {
+ kvmppc_mmu_map_segment(vcpu, eaddr);
+ map = find_sid_vsid(vcpu, vsid);
+ }
+ BUG_ON(!map);
+
+ vsid = map->host_vsid;
+ va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK);
+
+next_pteg:
+ if (rr == 16) {
+ primary = !primary;
+ evict = true;
+ rr = 0;
+ }
+
+ pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary);
+
+ /* not evicting yet */
+ if (!evict && (pteg[rr] & PTE_V)) {
+ rr += 2;
+ goto next_pteg;
+ }
+
+ dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]);
+
+ pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V |
+ (primary ? 0 : PTE_SEC);
+ pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
+
+ if (orig_pte->may_write) {
+ pteg1 |= PP_RWRW;
+ mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+ } else {
+ pteg1 |= PP_RWRX;
+ }
+
+ local_irq_disable();
+
+ if (pteg[rr]) {
+ pteg[rr] = 0;
+ asm volatile ("sync");
+ }
+ pteg[rr + 1] = pteg1;
+ pteg[rr] = pteg0;
+ asm volatile ("sync");
+
+ local_irq_enable();
+
+ dprintk_mmu("KVM: new PTEG: %p\n", pteg);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]);
+ dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]);
+
+
+ /* Now tell our Shadow PTE code about the new page */
+
+ pte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+ dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
+ orig_pte->may_write ? 'w' : '-',
+ orig_pte->may_execute ? 'x' : '-',
+ orig_pte->eaddr, (ulong)pteg, va,
+ orig_pte->vpage, hpaddr);
+
+ pte->slot = (ulong)&pteg[rr];
+ pte->host_va = va;
+ pte->pte = *orig_pte;
+ pte->pfn = hpaddr >> PAGE_SHIFT;
+
+ kvmppc_mmu_hpte_cache_map(vcpu, pte);
+
+out:
+ return r;
+}
+
+static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+ struct kvmppc_sid_map *map;
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ u16 sid_map_mask;
+ static int backwards_map = 0;
+
+ if (vcpu->arch.shared->msr & MSR_PR)
+ gvsid |= VSID_PR;
+
+ /* We might get collisions that trap in preceding order, so let's
+ map them differently */
+
+ sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+ if (backwards_map)
+ sid_map_mask = SID_MAP_MASK - sid_map_mask;
+
+ map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+
+ /* Make sure we're taking the other map next time */
+ backwards_map = !backwards_map;
+
+ /* Uh-oh ... out of mappings. Let's flush! */
+ if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) {
+ vcpu_book3s->vsid_next = 0;
+ memset(vcpu_book3s->sid_map, 0,
+ sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+ kvmppc_mmu_flush_segments(vcpu);
+ }
+ map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next];
+ vcpu_book3s->vsid_next++;
+
+ map->guest_vsid = gvsid;
+ map->valid = true;
+
+ return map;
+}
+
+int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
+{
+ u32 esid = eaddr >> SID_SHIFT;
+ u64 gvsid;
+ u32 sr;
+ struct kvmppc_sid_map *map;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ int r = 0;
+
+ if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
+ /* Invalidate an entry */
+ svcpu->sr[esid] = SR_INVALID;
+ r = -ENOENT;
+ goto out;
+ }
+
+ map = find_sid_vsid(vcpu, gvsid);
+ if (!map)
+ map = create_sid_map(vcpu, gvsid);
+
+ map->guest_esid = esid;
+ sr = map->host_vsid | SR_KP;
+ svcpu->sr[esid] = sr;
+
+ dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
+
+out:
+ svcpu_put(svcpu);
+ return r;
+}
+
+void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
+ dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
+ for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
+ svcpu->sr[i] = SR_INVALID;
+
+ svcpu_put(svcpu);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ kvmppc_mmu_hpte_destroy(vcpu);
+ preempt_disable();
+ for (i = 0; i < SID_CONTEXTS; i++)
+ __destroy_context(to_book3s(vcpu)->context_id[i]);
+ preempt_enable();
+}
+
+/* From mm/mmu_context_hash32.c */
+#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
+
+int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int err;
+ ulong sdr1;
+ int i;
+ int j;
+
+ for (i = 0; i < SID_CONTEXTS; i++) {
+ err = __init_new_context();
+ if (err < 0)
+ goto init_fail;
+ vcpu3s->context_id[i] = err;
+
+ /* Remember context id for this combination */
+ for (j = 0; j < 16; j++)
+ vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j);
+ }
+
+ vcpu3s->vsid_next = 0;
+
+ /* Remember where the HTAB is */
+ asm ( "mfsdr1 %0" : "=r"(sdr1) );
+ htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0;
+ htab = (ulong)__va(sdr1 & 0xffff0000);
+
+ kvmppc_mmu_hpte_init(vcpu);
+
+ return 0;
+
+init_fail:
+ for (j = 0; j < i; j++) {
+ if (!vcpu3s->context_id[j])
+ continue;
+
+ __destroy_context(to_book3s(vcpu)->context_id[j]);
+ }
+
+ return -1;
+}
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
new file mode 100644
index 00000000..7e06a6fc
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -0,0 +1,143 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
+
+.macro LOAD_GUEST_SEGMENTS
+
+ /* Required state:
+ *
+ * MSR = ~IR|DR
+ * R1 = host R1
+ * R2 = host R2
+ * R3 = shadow vcpu
+ * all other volatile GPRS = free except R4, R6
+ * SVCPU[CR] = guest CR
+ * SVCPU[XER] = guest XER
+ * SVCPU[CTR] = guest CTR
+ * SVCPU[LR] = guest LR
+ */
+
+#define XCHG_SR(n) lwz r9, (SVCPU_SR+(n*4))(r3); \
+ mtsr n, r9
+
+ XCHG_SR(0)
+ XCHG_SR(1)
+ XCHG_SR(2)
+ XCHG_SR(3)
+ XCHG_SR(4)
+ XCHG_SR(5)
+ XCHG_SR(6)
+ XCHG_SR(7)
+ XCHG_SR(8)
+ XCHG_SR(9)
+ XCHG_SR(10)
+ XCHG_SR(11)
+ XCHG_SR(12)
+ XCHG_SR(13)
+ XCHG_SR(14)
+ XCHG_SR(15)
+
+ /* Clear BATs. */
+
+#define KVM_KILL_BAT(n, reg) \
+ mtspr SPRN_IBAT##n##U,reg; \
+ mtspr SPRN_IBAT##n##L,reg; \
+ mtspr SPRN_DBAT##n##U,reg; \
+ mtspr SPRN_DBAT##n##L,reg; \
+
+ li r9, 0
+ KVM_KILL_BAT(0, r9)
+ KVM_KILL_BAT(1, r9)
+ KVM_KILL_BAT(2, r9)
+ KVM_KILL_BAT(3, r9)
+
+.endm
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+.macro LOAD_HOST_SEGMENTS
+
+ /* Register usage at this point:
+ *
+ * R1 = host R1
+ * R2 = host R2
+ * R12 = exit handler id
+ * R13 = shadow vcpu - SHADOW_VCPU_OFF
+ * SVCPU.* = guest *
+ * SVCPU[CR] = guest CR
+ * SVCPU[XER] = guest XER
+ * SVCPU[CTR] = guest CTR
+ * SVCPU[LR] = guest LR
+ *
+ */
+
+ /* Restore BATs */
+
+ /* We only overwrite the upper part, so we only restoree
+ the upper part. */
+#define KVM_LOAD_BAT(n, reg, RA, RB) \
+ lwz RA,(n*16)+0(reg); \
+ lwz RB,(n*16)+4(reg); \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_IBAT##n##L,RB; \
+ lwz RA,(n*16)+8(reg); \
+ lwz RB,(n*16)+12(reg); \
+ mtspr SPRN_DBAT##n##U,RA; \
+ mtspr SPRN_DBAT##n##L,RB; \
+
+ lis r9, BATS@ha
+ addi r9, r9, BATS@l
+ tophys(r9, r9)
+ KVM_LOAD_BAT(0, r9, r10, r11)
+ KVM_LOAD_BAT(1, r9, r10, r11)
+ KVM_LOAD_BAT(2, r9, r10, r11)
+ KVM_LOAD_BAT(3, r9, r10, r11)
+
+ /* Restore Segment Registers */
+
+ /* 0xc - 0xf */
+
+ li r0, 4
+ mtctr r0
+ LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc))
+ lis r4, 0xc000
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+
+ /* 0x0 - 0xb */
+
+ /* 'current->mm' needs to be in r4 */
+ tophys(r4, r2)
+ lwz r4, MM(r4)
+ tophys(r4, r4)
+ /* This only clobbers r0, r3, r4 and r5 */
+ bl switch_mmu_context
+
+.endm
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
new file mode 100644
index 00000000..b871721c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -0,0 +1,536 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+/* #define DEBUG_MMU */
+
+#ifdef DEBUG_MMU
+#define dprintk(X...) printk(KERN_INFO X)
+#else
+#define dprintk(X...) do { } while(0)
+#endif
+
+static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
+{
+ kvmppc_set_msr(vcpu, MSR_SF);
+}
+
+static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
+ struct kvm_vcpu *vcpu,
+ gva_t eaddr)
+{
+ int i;
+ u64 esid = GET_ESID(eaddr);
+ u64 esid_1t = GET_ESID_1T(eaddr);
+
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ u64 cmp_esid = esid;
+
+ if (!vcpu->arch.slb[i].valid)
+ continue;
+
+ if (vcpu->arch.slb[i].tb)
+ cmp_esid = esid_1t;
+
+ if (vcpu->arch.slb[i].esid == cmp_esid)
+ return &vcpu->arch.slb[i];
+ }
+
+ dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n",
+ eaddr, esid, esid_1t);
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (vcpu->arch.slb[i].vsid)
+ dprintk(" %d: %c%c%c %llx %llx\n", i,
+ vcpu->arch.slb[i].valid ? 'v' : ' ',
+ vcpu->arch.slb[i].large ? 'l' : ' ',
+ vcpu->arch.slb[i].tb ? 't' : ' ',
+ vcpu->arch.slb[i].esid,
+ vcpu->arch.slb[i].vsid);
+ }
+
+ return NULL;
+}
+
+static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
+ bool data)
+{
+ struct kvmppc_slb *slb;
+
+ slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
+ if (!slb)
+ return 0;
+
+ if (slb->tb)
+ return (((u64)eaddr >> 12) & 0xfffffff) |
+ (((u64)slb->vsid) << 28);
+
+ return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16);
+}
+
+static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
+{
+ return slbe->large ? 24 : 12;
+}
+
+static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
+{
+ int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
+ return ((eaddr & 0xfffffff) >> p);
+}
+
+static hva_t kvmppc_mmu_book3s_64_get_pteg(
+ struct kvmppc_vcpu_book3s *vcpu_book3s,
+ struct kvmppc_slb *slbe, gva_t eaddr,
+ bool second)
+{
+ u64 hash, pteg, htabsize;
+ u32 page;
+ hva_t r;
+
+ page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
+ htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
+
+ hash = slbe->vsid ^ page;
+ if (second)
+ hash = ~hash;
+ hash &= ((1ULL << 39ULL) - 1ULL);
+ hash &= htabsize;
+ hash <<= 7ULL;
+
+ pteg = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+ pteg |= hash;
+
+ dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
+ page, vcpu_book3s->sdr1, pteg, slbe->vsid);
+
+ /* When running a PAPR guest, SDR1 contains a HVA address instead
+ of a GPA */
+ if (vcpu_book3s->vcpu.arch.papr_enabled)
+ r = pteg;
+ else
+ r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+
+ if (kvm_is_error_hva(r))
+ return r;
+ return r | (pteg & ~PAGE_MASK);
+}
+
+static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
+{
+ int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
+ u64 avpn;
+
+ avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
+ avpn |= slbe->vsid << (28 - p);
+
+ if (p < 24)
+ avpn >>= ((80 - p) - 56) - 8;
+ else
+ avpn <<= 8;
+
+ return avpn;
+}
+
+static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, bool data)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ struct kvmppc_slb *slbe;
+ hva_t ptegp;
+ u64 pteg[16];
+ u64 avpn = 0;
+ int i;
+ u8 key = 0;
+ bool found = false;
+ bool perm_err = false;
+ int second = 0;
+ ulong mp_ea = vcpu->arch.magic_page_ea;
+
+ /* Magic page override */
+ if (unlikely(mp_ea) &&
+ unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ gpte->eaddr = eaddr;
+ gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+ gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
+ gpte->raddr &= KVM_PAM;
+ gpte->may_execute = true;
+ gpte->may_read = true;
+ gpte->may_write = true;
+
+ return 0;
+ }
+
+ slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
+ if (!slbe)
+ goto no_seg_found;
+
+do_second:
+ ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
+ if (kvm_is_error_hva(ptegp))
+ goto no_page_found;
+
+ avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+
+ if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
+ printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
+ goto no_page_found;
+ }
+
+ if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
+ key = 4;
+ else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
+ key = 4;
+
+ for (i=0; i<16; i+=2) {
+ u64 v = pteg[i];
+ u64 r = pteg[i+1];
+
+ /* Valid check */
+ if (!(v & HPTE_V_VALID))
+ continue;
+ /* Hash check */
+ if ((v & HPTE_V_SECONDARY) != second)
+ continue;
+
+ /* AVPN compare */
+ if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) {
+ u8 pp = (r & HPTE_R_PP) | key;
+ int eaddr_mask = 0xFFF;
+
+ gpte->eaddr = eaddr;
+ gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
+ eaddr,
+ data);
+ if (slbe->large)
+ eaddr_mask = 0xFFFFFF;
+ gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
+ gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+ gpte->may_read = false;
+ gpte->may_write = false;
+
+ switch (pp) {
+ case 0:
+ case 1:
+ case 2:
+ case 6:
+ gpte->may_write = true;
+ /* fall through */
+ case 3:
+ case 5:
+ case 7:
+ gpte->may_read = true;
+ break;
+ }
+
+ if (!gpte->may_read) {
+ perm_err = true;
+ continue;
+ }
+
+ dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
+ "-> 0x%lx\n",
+ eaddr, avpn, gpte->vpage, gpte->raddr);
+ found = true;
+ break;
+ }
+ }
+
+ /* Update PTE R and C bits, so the guest's swapper knows we used the
+ * page */
+ if (found) {
+ u32 oldr = pteg[i+1];
+
+ if (gpte->may_read) {
+ /* Set the accessed flag */
+ pteg[i+1] |= HPTE_R_R;
+ }
+ if (gpte->may_write) {
+ /* Set the dirty flag */
+ pteg[i+1] |= HPTE_R_C;
+ } else {
+ dprintk("KVM: Mapping read-only page!\n");
+ }
+
+ /* Write back into the PTEG */
+ if (pteg[i+1] != oldr)
+ copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+
+ return 0;
+ } else {
+ dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
+ "ptegp=0x%lx)\n",
+ eaddr, to_book3s(vcpu)->sdr1, ptegp);
+ for (i = 0; i < 16; i += 2)
+ dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n",
+ i, pteg[i], pteg[i+1], avpn);
+
+ if (!second) {
+ second = HPTE_V_SECONDARY;
+ goto do_second;
+ }
+ }
+
+
+no_page_found:
+
+
+ if (perm_err)
+ return -EPERM;
+
+ return -ENOENT;
+
+no_seg_found:
+
+ dprintk("KVM MMU: Trigger segment fault\n");
+ return -EINVAL;
+}
+
+static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s;
+ u64 esid, esid_1t;
+ int slb_nr;
+ struct kvmppc_slb *slbe;
+
+ dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
+
+ vcpu_book3s = to_book3s(vcpu);
+
+ esid = GET_ESID(rb);
+ esid_1t = GET_ESID_1T(rb);
+ slb_nr = rb & 0xfff;
+
+ if (slb_nr > vcpu->arch.slb_nr)
+ return;
+
+ slbe = &vcpu->arch.slb[slb_nr];
+
+ slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
+ slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
+ slbe->esid = slbe->tb ? esid_1t : esid;
+ slbe->vsid = rs >> 12;
+ slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
+ slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
+ slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0;
+ slbe->nx = (rs & SLB_VSID_N) ? 1 : 0;
+ slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
+
+ slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
+ slbe->origv = rs;
+
+ /* Map the new segment */
+ kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT);
+}
+
+static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
+{
+ struct kvmppc_slb *slbe;
+
+ if (slb_nr > vcpu->arch.slb_nr)
+ return 0;
+
+ slbe = &vcpu->arch.slb[slb_nr];
+
+ return slbe->orige;
+}
+
+static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
+{
+ struct kvmppc_slb *slbe;
+
+ if (slb_nr > vcpu->arch.slb_nr)
+ return 0;
+
+ slbe = &vcpu->arch.slb[slb_nr];
+
+ return slbe->origv;
+}
+
+static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
+{
+ struct kvmppc_slb *slbe;
+
+ dprintk("KVM MMU: slbie(0x%llx)\n", ea);
+
+ slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
+
+ if (!slbe)
+ return;
+
+ dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
+
+ slbe->valid = false;
+
+ kvmppc_mmu_map_segment(vcpu, ea);
+}
+
+static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ dprintk("KVM MMU: slbia()\n");
+
+ for (i = 1; i < vcpu->arch.slb_nr; i++)
+ vcpu->arch.slb[i].valid = false;
+
+ if (vcpu->arch.shared->msr & MSR_IR) {
+ kvmppc_mmu_flush_segments(vcpu);
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+ }
+}
+
+static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
+ ulong value)
+{
+ u64 rb = 0, rs = 0;
+
+ /*
+ * According to Book3 2.01 mtsrin is implemented as:
+ *
+ * The SLB entry specified by (RB)32:35 is loaded from register
+ * RS, as follows.
+ *
+ * SLBE Bit Source SLB Field
+ *
+ * 0:31 0x0000_0000 ESID-0:31
+ * 32:35 (RB)32:35 ESID-32:35
+ * 36 0b1 V
+ * 37:61 0x00_0000|| 0b0 VSID-0:24
+ * 62:88 (RS)37:63 VSID-25:51
+ * 89:91 (RS)33:35 Ks Kp N
+ * 92 (RS)36 L ((RS)36 must be 0b0)
+ * 93 0b0 C
+ */
+
+ dprintk("KVM MMU: mtsrin(0x%x, 0x%lx)\n", srnum, value);
+
+ /* ESID = srnum */
+ rb |= (srnum & 0xf) << 28;
+ /* Set the valid bit */
+ rb |= 1 << 27;
+ /* Index = ESID */
+ rb |= srnum;
+
+ /* VSID = VSID */
+ rs |= (value & 0xfffffff) << 12;
+ /* flags = flags */
+ rs |= ((value >> 28) & 0x7) << 9;
+
+ kvmppc_mmu_book3s_64_slbmte(vcpu, rs, rb);
+}
+
+static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
+ bool large)
+{
+ u64 mask = 0xFFFFFFFFFULL;
+
+ dprintk("KVM MMU: tlbie(0x%lx)\n", va);
+
+ if (large)
+ mask = 0xFFFFFF000ULL;
+ kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
+}
+
+static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
+ u64 *vsid)
+{
+ ulong ea = esid << SID_SHIFT;
+ struct kvmppc_slb *slb;
+ u64 gvsid = esid;
+ ulong mp_ea = vcpu->arch.magic_page_ea;
+
+ if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
+ if (slb)
+ gvsid = slb->vsid;
+ }
+
+ switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ case 0:
+ *vsid = VSID_REAL | esid;
+ break;
+ case MSR_IR:
+ *vsid = VSID_REAL_IR | gvsid;
+ break;
+ case MSR_DR:
+ *vsid = VSID_REAL_DR | gvsid;
+ break;
+ case MSR_DR|MSR_IR:
+ if (!slb)
+ goto no_slb;
+
+ *vsid = gvsid;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (vcpu->arch.shared->msr & MSR_PR)
+ *vsid |= VSID_PR;
+
+ return 0;
+
+no_slb:
+ /* Catch magic page case */
+ if (unlikely(mp_ea) &&
+ unlikely(esid == (mp_ea >> SID_SHIFT)) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ *vsid = VSID_REAL | esid;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu)
+{
+ return (to_book3s(vcpu)->hid[5] & 0x80);
+}
+
+void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+ mmu->mfsrin = NULL;
+ mmu->mtsrin = kvmppc_mmu_book3s_64_mtsrin;
+ mmu->slbmte = kvmppc_mmu_book3s_64_slbmte;
+ mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee;
+ mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev;
+ mmu->slbie = kvmppc_mmu_book3s_64_slbie;
+ mmu->slbia = kvmppc_mmu_book3s_64_slbia;
+ mmu->xlate = kvmppc_mmu_book3s_64_xlate;
+ mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
+ mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
+ mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
+ mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
+ mmu->is_dcbz32 = kvmppc_mmu_book3s_64_is_dcbz32;
+
+ vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
+}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
new file mode 100644
index 00000000..10fc8ec9
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2009 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+#include "trace.h"
+
+#define PTE_SIZE 12
+
+void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+ ppc_md.hpte_invalidate(pte->slot, pte->host_va,
+ MMU_PAGE_4K, MMU_SEGSIZE_256M,
+ false);
+}
+
+/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
+ * a hash, so we don't waste cycles on looping */
+static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+ return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+ ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+}
+
+
+static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+ struct kvmppc_sid_map *map;
+ u16 sid_map_mask;
+
+ if (vcpu->arch.shared->msr & MSR_PR)
+ gvsid |= VSID_PR;
+
+ sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+ map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+ if (map->valid && (map->guest_vsid == gvsid)) {
+ trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
+ return map;
+ }
+
+ map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
+ if (map->valid && (map->guest_vsid == gvsid)) {
+ trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
+ return map;
+ }
+
+ trace_kvm_book3s_slb_fail(sid_map_mask, gvsid);
+ return NULL;
+}
+
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+{
+ pfn_t hpaddr;
+ ulong hash, hpteg, va;
+ u64 vsid;
+ int ret;
+ int rflags = 0x192;
+ int vflags = 0;
+ int attempt = 0;
+ struct kvmppc_sid_map *map;
+ int r = 0;
+
+ /* Get host physical address for gpa */
+ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+ if (is_error_pfn(hpaddr)) {
+ printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
+ r = -EINVAL;
+ goto out;
+ }
+ hpaddr <<= PAGE_SHIFT;
+ hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+
+ /* and write the mapping ea -> hpa into the pt */
+ vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
+ map = find_sid_vsid(vcpu, vsid);
+ if (!map) {
+ ret = kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr);
+ WARN_ON(ret < 0);
+ map = find_sid_vsid(vcpu, vsid);
+ }
+ if (!map) {
+ printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
+ vsid, orig_pte->eaddr);
+ WARN_ON(true);
+ r = -EINVAL;
+ goto out;
+ }
+
+ vsid = map->host_vsid;
+ va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+
+ if (!orig_pte->may_write)
+ rflags |= HPTE_R_PP;
+ else
+ mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+
+ if (!orig_pte->may_execute)
+ rflags |= HPTE_R_N;
+
+ hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
+
+map_again:
+ hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+ /* In case we tried normal mapping already, let's nuke old entries */
+ if (attempt > 1)
+ if (ppc_md.hpte_remove(hpteg) < 0) {
+ r = -1;
+ goto out;
+ }
+
+ ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+
+ if (ret < 0) {
+ /* If we couldn't map a primary PTE, try a secondary */
+ hash = ~hash;
+ vflags ^= HPTE_V_SECONDARY;
+ attempt++;
+ goto map_again;
+ } else {
+ struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+ trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
+
+ /* The ppc_md code may give us a secondary entry even though we
+ asked for a primary. Fix up. */
+ if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) {
+ hash = ~hash;
+ hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+ }
+
+ pte->slot = hpteg + (ret & 7);
+ pte->host_va = va;
+ pte->pte = *orig_pte;
+ pte->pfn = hpaddr >> PAGE_SHIFT;
+
+ kvmppc_mmu_hpte_cache_map(vcpu, pte);
+ }
+
+out:
+ return r;
+}
+
+static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+ struct kvmppc_sid_map *map;
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ u16 sid_map_mask;
+ static int backwards_map = 0;
+
+ if (vcpu->arch.shared->msr & MSR_PR)
+ gvsid |= VSID_PR;
+
+ /* We might get collisions that trap in preceding order, so let's
+ map them differently */
+
+ sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+ if (backwards_map)
+ sid_map_mask = SID_MAP_MASK - sid_map_mask;
+
+ map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+
+ /* Make sure we're taking the other map next time */
+ backwards_map = !backwards_map;
+
+ /* Uh-oh ... out of mappings. Let's flush! */
+ if (vcpu_book3s->proto_vsid_next == vcpu_book3s->proto_vsid_max) {
+ vcpu_book3s->proto_vsid_next = vcpu_book3s->proto_vsid_first;
+ memset(vcpu_book3s->sid_map, 0,
+ sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+ kvmppc_mmu_flush_segments(vcpu);
+ }
+ map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
+
+ map->guest_vsid = gvsid;
+ map->valid = true;
+
+ trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid);
+
+ return map;
+}
+
+static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
+{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ int i;
+ int max_slb_size = 64;
+ int found_inval = -1;
+ int r;
+
+ if (!svcpu->slb_max)
+ svcpu->slb_max = 1;
+
+ /* Are we overwriting? */
+ for (i = 1; i < svcpu->slb_max; i++) {
+ if (!(svcpu->slb[i].esid & SLB_ESID_V))
+ found_inval = i;
+ else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
+ r = i;
+ goto out;
+ }
+ }
+
+ /* Found a spare entry that was invalidated before */
+ if (found_inval > 0) {
+ r = found_inval;
+ goto out;
+ }
+
+ /* No spare invalid entry, so create one */
+
+ if (mmu_slb_size < 64)
+ max_slb_size = mmu_slb_size;
+
+ /* Overflowing -> purge */
+ if ((svcpu->slb_max) == max_slb_size)
+ kvmppc_mmu_flush_segments(vcpu);
+
+ r = svcpu->slb_max;
+ svcpu->slb_max++;
+
+out:
+ svcpu_put(svcpu);
+ return r;
+}
+
+int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
+{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ u64 esid = eaddr >> SID_SHIFT;
+ u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
+ u64 slb_vsid = SLB_VSID_USER;
+ u64 gvsid;
+ int slb_index;
+ struct kvmppc_sid_map *map;
+ int r = 0;
+
+ slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
+
+ if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
+ /* Invalidate an entry */
+ svcpu->slb[slb_index].esid = 0;
+ r = -ENOENT;
+ goto out;
+ }
+
+ map = find_sid_vsid(vcpu, gvsid);
+ if (!map)
+ map = create_sid_map(vcpu, gvsid);
+
+ map->guest_esid = esid;
+
+ slb_vsid |= (map->host_vsid << 12);
+ slb_vsid &= ~SLB_VSID_KP;
+ slb_esid |= slb_index;
+
+ svcpu->slb[slb_index].esid = slb_esid;
+ svcpu->slb[slb_index].vsid = slb_vsid;
+
+ trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
+
+out:
+ svcpu_put(svcpu);
+ return r;
+}
+
+void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->slb_max = 1;
+ svcpu->slb[0].esid = 0;
+ svcpu_put(svcpu);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ kvmppc_mmu_hpte_destroy(vcpu);
+ __destroy_context(to_book3s(vcpu)->context_id[0]);
+}
+
+int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int err;
+
+ err = __init_new_context();
+ if (err < 0)
+ return -1;
+ vcpu3s->context_id[0] = err;
+
+ vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1)
+ << USER_ESID_BITS) - 1;
+ vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
+ vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
+
+ kvmppc_mmu_hpte_init(vcpu);
+
+ return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
new file mode 100644
index 00000000..c3beaeef
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -0,0 +1,1027 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cputable.h>
+
+/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
+#define MAX_LPID_970 63
+#define NR_LPIDS (LPID_RSVD + 1)
+unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
+
+long kvmppc_alloc_hpt(struct kvm *kvm)
+{
+ unsigned long hpt;
+ unsigned long lpid;
+ struct revmap_entry *rev;
+ struct kvmppc_linear_info *li;
+
+ /* Allocate guest's hashed page table */
+ li = kvm_alloc_hpt();
+ if (li) {
+ /* using preallocated memory */
+ hpt = (ulong)li->base_virt;
+ kvm->arch.hpt_li = li;
+ } else {
+ /* using dynamic memory */
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
+ __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
+ }
+
+ if (!hpt) {
+ pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
+ return -ENOMEM;
+ }
+ kvm->arch.hpt_virt = hpt;
+
+ /* Allocate reverse map array */
+ rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
+ if (!rev) {
+ pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
+ goto out_freehpt;
+ }
+ kvm->arch.revmap = rev;
+
+ /* Allocate the guest's logical partition ID */
+ do {
+ lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
+ if (lpid >= NR_LPIDS) {
+ pr_err("kvm_alloc_hpt: No LPIDs free\n");
+ goto out_freeboth;
+ }
+ } while (test_and_set_bit(lpid, lpid_inuse));
+
+ kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
+ kvm->arch.lpid = lpid;
+
+ pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
+ return 0;
+
+ out_freeboth:
+ vfree(rev);
+ out_freehpt:
+ free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
+ return -ENOMEM;
+}
+
+void kvmppc_free_hpt(struct kvm *kvm)
+{
+ clear_bit(kvm->arch.lpid, lpid_inuse);
+ vfree(kvm->arch.revmap);
+ if (kvm->arch.hpt_li)
+ kvm_release_hpt(kvm->arch.hpt_li);
+ else
+ free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
+}
+
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize == 0x10000) ? 0x1000 : 0;
+}
+
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+ unsigned long porder)
+{
+ unsigned long i;
+ unsigned long npages;
+ unsigned long hp_v, hp_r;
+ unsigned long addr, hash;
+ unsigned long psize;
+ unsigned long hp0, hp1;
+ long ret;
+
+ psize = 1ul << porder;
+ npages = memslot->npages >> (porder - PAGE_SHIFT);
+
+ /* VRMA can't be > 1TB */
+ if (npages > 1ul << (40 - porder))
+ npages = 1ul << (40 - porder);
+ /* Can't use more than 1 HPTE per HPTEG */
+ if (npages > HPT_NPTEG)
+ npages = HPT_NPTEG;
+
+ hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+ HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
+ hp1 = hpte1_pgsize_encoding(psize) |
+ HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+
+ for (i = 0; i < npages; ++i) {
+ addr = i << porder;
+ /* can't use hpt_hash since va > 64 bits */
+ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
+ /*
+ * We assume that the hash table is empty and no
+ * vcpus are using it at this stage. Since we create
+ * at most one HPTE per HPTEG, we just assume entry 7
+ * is available and use it.
+ */
+ hash = (hash << 3) + 7;
+ hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
+ hp_r = hp1 | addr;
+ ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
+ if (ret != H_SUCCESS) {
+ pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
+ addr, ret);
+ break;
+ }
+ }
+}
+
+int kvmppc_mmu_hv_init(void)
+{
+ unsigned long host_lpid, rsvd_lpid;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EINVAL;
+
+ memset(lpid_inuse, 0, sizeof(lpid_inuse));
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ host_lpid = mfspr(SPRN_LPID); /* POWER7 */
+ rsvd_lpid = LPID_RSVD;
+ } else {
+ host_lpid = 0; /* PPC970 */
+ rsvd_lpid = MAX_LPID_970;
+ }
+
+ set_bit(host_lpid, lpid_inuse);
+ /* rsvd_lpid is reserved for use in partition switching */
+ set_bit(rsvd_lpid, lpid_inuse);
+
+ return 0;
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+}
+
+static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
+{
+ kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
+}
+
+/*
+ * This is called to get a reference to a guest page if there isn't
+ * one already in the kvm->arch.slot_phys[][] arrays.
+ */
+static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
+ struct kvm_memory_slot *memslot,
+ unsigned long psize)
+{
+ unsigned long start;
+ long np, err;
+ struct page *page, *hpage, *pages[1];
+ unsigned long s, pgsize;
+ unsigned long *physp;
+ unsigned int is_io, got, pgorder;
+ struct vm_area_struct *vma;
+ unsigned long pfn, i, npages;
+
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return -EINVAL;
+ if (physp[gfn - memslot->base_gfn])
+ return 0;
+
+ is_io = 0;
+ got = 0;
+ page = NULL;
+ pgsize = psize;
+ err = -EINVAL;
+ start = gfn_to_hva_memslot(memslot, gfn);
+
+ /* Instantiate and get the page we want access to */
+ np = get_user_pages_fast(start, 1, 1, pages);
+ if (np != 1) {
+ /* Look up the vma for the page */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, start);
+ if (!vma || vma->vm_start > start ||
+ start + psize > vma->vm_end ||
+ !(vma->vm_flags & VM_PFNMAP))
+ goto up_err;
+ is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ /* check alignment of pfn vs. requested page size */
+ if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
+ goto up_err;
+ up_read(&current->mm->mmap_sem);
+
+ } else {
+ page = pages[0];
+ got = KVMPPC_GOT_PAGE;
+
+ /* See if this is a large page */
+ s = PAGE_SIZE;
+ if (PageHuge(page)) {
+ hpage = compound_head(page);
+ s <<= compound_order(hpage);
+ /* Get the whole large page if slot alignment is ok */
+ if (s > psize && slot_is_aligned(memslot, s) &&
+ !(memslot->userspace_addr & (s - 1))) {
+ start &= ~(s - 1);
+ pgsize = s;
+ get_page(hpage);
+ put_page(page);
+ page = hpage;
+ }
+ }
+ if (s < psize)
+ goto out;
+ pfn = page_to_pfn(page);
+ }
+
+ npages = pgsize >> PAGE_SHIFT;
+ pgorder = __ilog2(npages);
+ physp += (gfn - memslot->base_gfn) & ~(npages - 1);
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i) {
+ if (!physp[i]) {
+ physp[i] = ((pfn + i) << PAGE_SHIFT) +
+ got + is_io + pgorder;
+ got = 0;
+ }
+ }
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ err = 0;
+
+ out:
+ if (got)
+ put_page(page);
+ return err;
+
+ up_err:
+ up_read(&current->mm->mmap_sem);
+ return err;
+}
+
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long psize, gpa, gfn;
+ struct kvm_memory_slot *memslot;
+ long ret;
+
+ if (kvm->arch.using_mmu_notifiers)
+ goto do_insert;
+
+ psize = hpte_page_size(pteh, ptel);
+ if (!psize)
+ return H_PARAMETER;
+
+ pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+
+ /* Find the memslot (if any) for this address */
+ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
+ return H_PARAMETER;
+ }
+
+ do_insert:
+ /* Protect linux PTE lookup from page table destruction */
+ rcu_read_lock_sched(); /* this disables preemption too */
+ vcpu->arch.pgdir = current->mm->pgd;
+ ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
+ rcu_read_unlock_sched();
+ if (ret == H_TOO_HARD) {
+ /* this can't happen */
+ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
+ ret = H_RESOURCE; /* or something */
+ }
+ return ret;
+
+}
+
+static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
+ gva_t eaddr)
+{
+ u64 mask;
+ int i;
+
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
+ continue;
+
+ if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
+ mask = ESID_MASK_1T;
+ else
+ mask = ESID_MASK;
+
+ if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
+ return &vcpu->arch.slb[i];
+ }
+ return NULL;
+}
+
+static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
+ unsigned long ea)
+{
+ unsigned long ra_mask;
+
+ ra_mask = hpte_page_size(v, r) - 1;
+ return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
+}
+
+static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, bool data)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_slb *slbe;
+ unsigned long slb_v;
+ unsigned long pp, key;
+ unsigned long v, gr;
+ unsigned long *hptep;
+ int index;
+ int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
+
+ /* Get SLB entry */
+ if (virtmode) {
+ slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
+ if (!slbe)
+ return -EINVAL;
+ slb_v = slbe->origv;
+ } else {
+ /* real mode access */
+ slb_v = vcpu->kvm->arch.vrma_slb_v;
+ }
+
+ /* Find the HPTE in the hash table */
+ index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
+ HPTE_V_VALID | HPTE_V_ABSENT);
+ if (index < 0)
+ return -ENOENT;
+ hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ v = hptep[0] & ~HPTE_V_HVLOCK;
+ gr = kvm->arch.revmap[index].guest_rpte;
+
+ /* Unlock the HPTE */
+ asm volatile("lwsync" : : : "memory");
+ hptep[0] = v;
+
+ gpte->eaddr = eaddr;
+ gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
+
+ /* Get PP bits and key for permission check */
+ pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+ key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ key &= slb_v;
+
+ /* Calculate permissions */
+ gpte->may_read = hpte_read_permission(pp, key);
+ gpte->may_write = hpte_write_permission(pp, key);
+ gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
+
+ /* Storage key permission check for POWER7 */
+ if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+ int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
+ if (amrfield & 1)
+ gpte->may_read = 0;
+ if (amrfield & 2)
+ gpte->may_write = 0;
+ }
+
+ /* Get the guest physical address */
+ gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
+ return 0;
+}
+
+/*
+ * Quick test for whether an instruction is a load or a store.
+ * If the instruction is a load or a store, then this will indicate
+ * which it is, at least on server processors. (Embedded processors
+ * have some external PID instructions that don't follow the rule
+ * embodied here.) If the instruction isn't a load or store, then
+ * this doesn't return anything useful.
+ */
+static int instruction_is_store(unsigned int instr)
+{
+ unsigned int mask;
+
+ mask = 0x10000000;
+ if ((instr & 0xfc000000) == 0x7c000000)
+ mask = 0x100; /* major opcode 31 */
+ return (instr & mask) != 0;
+}
+
+static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long gpa, int is_store)
+{
+ int ret;
+ u32 last_inst;
+ unsigned long srr0 = kvmppc_get_pc(vcpu);
+
+ /* We try to load the last instruction. We don't let
+ * emulate_instruction do it as it doesn't check what
+ * kvmppc_ld returns.
+ * If we fail, we just return to the guest and try executing it again.
+ */
+ if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
+ ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
+ if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
+ return RESUME_GUEST;
+ vcpu->arch.last_inst = last_inst;
+ }
+
+ /*
+ * WARNING: We do not know for sure whether the instruction we just
+ * read from memory is the same that caused the fault in the first
+ * place. If the instruction we read is neither an load or a store,
+ * then it can't access memory, so we don't need to worry about
+ * enforcing access permissions. So, assuming it is a load or
+ * store, we just check that its direction (load or store) is
+ * consistent with the original fault, since that's what we
+ * checked the access permissions against. If there is a mismatch
+ * we just return and retry the instruction.
+ */
+
+ if (instruction_is_store(vcpu->arch.last_inst) != !!is_store)
+ return RESUME_GUEST;
+
+ /*
+ * Emulated accesses are emulated by looking at the hash for
+ * translation once, then performing the access later. The
+ * translation could be invalidated in the meantime in which
+ * point performing the subsequent memory access on the old
+ * physical address could possibly be a security hole for the
+ * guest (but not the host).
+ *
+ * This is less of an issue for MMIO stores since they aren't
+ * globally visible. It could be an issue for MMIO loads to
+ * a certain extent but we'll ignore it for now.
+ */
+
+ vcpu->arch.paddr_accessed = gpa;
+ return kvmppc_emulate_mmio(run, vcpu);
+}
+
+int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hptep, hpte[3], r;
+ unsigned long mmu_seq, psize, pte_size;
+ unsigned long gfn, hva, pfn;
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ struct revmap_entry *rev;
+ struct page *page, *pages[1];
+ long index, ret, npages;
+ unsigned long is_io;
+ unsigned int writing, write_ok;
+ struct vm_area_struct *vma;
+ unsigned long rcbits;
+
+ /*
+ * Real-mode code has already searched the HPT and found the
+ * entry we're interested in. Lock the entry and check that
+ * it hasn't changed. If it has, just return and re-execute the
+ * instruction.
+ */
+ if (ea != vcpu->arch.pgfault_addr)
+ return RESUME_GUEST;
+ index = vcpu->arch.pgfault_index;
+ hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ rev = &kvm->arch.revmap[index];
+ preempt_disable();
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+ hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
+ hpte[1] = hptep[1];
+ hpte[2] = r = rev->guest_rpte;
+ asm volatile("lwsync" : : : "memory");
+ hptep[0] = hpte[0];
+ preempt_enable();
+
+ if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
+ hpte[1] != vcpu->arch.pgfault_hpte[1])
+ return RESUME_GUEST;
+
+ /* Translate the logical address and get the page */
+ psize = hpte_page_size(hpte[0], r);
+ gfn = hpte_rpn(r, psize);
+ memslot = gfn_to_memslot(kvm, gfn);
+
+ /* No memslot means it's an emulated MMIO region */
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+ unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
+ dsisr & DSISR_ISSTORE);
+ }
+
+ if (!kvm->arch.using_mmu_notifiers)
+ return -EFAULT; /* should never get here */
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ is_io = 0;
+ pfn = 0;
+ page = NULL;
+ pte_size = PAGE_SIZE;
+ writing = (dsisr & DSISR_ISSTORE) != 0;
+ /* If writing != 0, then the HPTE must allow writing, if we get here */
+ write_ok = writing;
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, writing, pages);
+ if (npages < 1) {
+ /* Check if it's an I/O mapping */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, hva);
+ if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
+ (vma->vm_flags & VM_PFNMAP)) {
+ pfn = vma->vm_pgoff +
+ ((hva - vma->vm_start) >> PAGE_SHIFT);
+ pte_size = psize;
+ is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ write_ok = vma->vm_flags & VM_WRITE;
+ }
+ up_read(&current->mm->mmap_sem);
+ if (!pfn)
+ return -EFAULT;
+ } else {
+ page = pages[0];
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ pte_size <<= compound_order(page);
+ }
+ /* if the guest wants write access, see if that is OK */
+ if (!writing && hpte_is_writable(r)) {
+ pte_t *ptep, pte;
+
+ /*
+ * We need to protect against page table destruction
+ * while looking up and updating the pte.
+ */
+ rcu_read_lock_sched();
+ ptep = find_linux_pte_or_hugepte(current->mm->pgd,
+ hva, NULL);
+ if (ptep && pte_present(*ptep)) {
+ pte = kvmppc_read_update_linux_pte(ptep, 1);
+ if (pte_write(pte))
+ write_ok = 1;
+ }
+ rcu_read_unlock_sched();
+ }
+ pfn = page_to_pfn(page);
+ }
+
+ ret = -EFAULT;
+ if (psize > pte_size)
+ goto out_put;
+
+ /* Check WIMG vs. the actual page we're accessing */
+ if (!hpte_cache_flags_ok(r, is_io)) {
+ if (is_io)
+ return -EFAULT;
+ /*
+ * Allow guest to map emulated device memory as
+ * uncacheable, but actually make it cacheable.
+ */
+ r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
+ }
+
+ /* Set the HPTE to point to pfn */
+ r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
+ if (hpte_is_writable(r) && !write_ok)
+ r = hpte_make_readonly(r);
+ ret = RESUME_GUEST;
+ preempt_disable();
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
+ rev->guest_rpte != hpte[2])
+ /* HPTE has been changed under us; let the guest retry */
+ goto out_unlock;
+ hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+
+ rmap = &memslot->rmap[gfn - memslot->base_gfn];
+ lock_rmap(rmap);
+
+ /* Check if we might have been invalidated; let the guest retry if so */
+ ret = RESUME_GUEST;
+ if (mmu_notifier_retry(vcpu, mmu_seq)) {
+ unlock_rmap(rmap);
+ goto out_unlock;
+ }
+
+ /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
+ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+ r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+
+ if (hptep[0] & HPTE_V_VALID) {
+ /* HPTE was previously valid, so we need to invalidate it */
+ unlock_rmap(rmap);
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, index);
+ /* don't lose previous R and C bits */
+ r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+ }
+
+ hptep[1] = r;
+ eieio();
+ hptep[0] = hpte[0];
+ asm volatile("ptesync" : : : "memory");
+ preempt_enable();
+ if (page && hpte_is_writable(r))
+ SetPageDirty(page);
+
+ out_put:
+ if (page) {
+ /*
+ * We drop pages[0] here, not page because page might
+ * have been set to the head page of a compound, but
+ * we have to drop the reference on the correct tail
+ * page to match the get inside gup()
+ */
+ put_page(pages[0]);
+ }
+ return ret;
+
+ out_unlock:
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ preempt_enable();
+ goto out_put;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn))
+{
+ int ret;
+ int retval = 0;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+
+ ret = handler(kvm, &memslot->rmap[gfn_offset],
+ memslot->base_gfn + gfn_offset);
+ retval |= ret;
+ }
+ }
+
+ return retval;
+}
+
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long h, i, j;
+ unsigned long *hptep;
+ unsigned long ptel, psize, rcbits;
+
+ for (;;) {
+ lock_rmap(rmapp);
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ break;
+ }
+
+ /*
+ * To avoid an ABBA deadlock with the HPTE lock bit,
+ * we can't spin on the HPTE lock while holding the
+ * rmap chain lock.
+ */
+ i = *rmapp & KVMPPC_RMAP_INDEX;
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ continue;
+ }
+ j = rev[i].forw;
+ if (j == i) {
+ /* chain is now empty */
+ *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ } else {
+ /* remove i from chain */
+ h = rev[i].back;
+ rev[h].forw = j;
+ rev[j].back = h;
+ rev[i].forw = rev[i].back = i;
+ *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+ }
+
+ /* Now check and modify the HPTE */
+ ptel = rev[i].guest_rpte;
+ psize = hpte_page_size(hptep[0], ptel);
+ if ((hptep[0] & HPTE_V_VALID) &&
+ hpte_rpn(ptel, psize) == gfn) {
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ /* Harvest R and C */
+ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
+ *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ rev[i].guest_rpte = ptel | rcbits;
+ }
+ unlock_rmap(rmapp);
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ }
+ return 0;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ if (kvm->arch.using_mmu_notifiers)
+ kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ return 0;
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hptep;
+ int ret = 0;
+
+ retry:
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_REFERENCED) {
+ *rmapp &= ~KVMPPC_RMAP_REFERENCED;
+ ret = 1;
+ }
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ return ret;
+ }
+
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+
+ /* If this HPTE isn't referenced, ignore it */
+ if (!(hptep[1] & HPTE_R_R))
+ continue;
+
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ goto retry;
+ }
+
+ /* Now check and modify the HPTE */
+ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
+ kvmppc_clear_ref_hpte(kvm, hptep, i);
+ rev[i].guest_rpte |= HPTE_R_R;
+ ret = 1;
+ }
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ } while ((i = j) != head);
+
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return 0;
+ return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+}
+
+static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hp;
+ int ret = 1;
+
+ if (*rmapp & KVMPPC_RMAP_REFERENCED)
+ return 1;
+
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_REFERENCED)
+ goto out;
+
+ if (*rmapp & KVMPPC_RMAP_PRESENT) {
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+ if (hp[1] & HPTE_R_R)
+ goto out;
+ } while ((i = j) != head);
+ }
+ ret = 0;
+
+ out:
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return 0;
+ return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return;
+ kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+}
+
+static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hptep;
+ int ret = 0;
+
+ retry:
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_CHANGED) {
+ *rmapp &= ~KVMPPC_RMAP_CHANGED;
+ ret = 1;
+ }
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ return ret;
+ }
+
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+
+ if (!(hptep[1] & HPTE_R_C))
+ continue;
+
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ goto retry;
+ }
+
+ /* Now check and modify the HPTE */
+ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
+ /* need to make it temporarily absent to clear C */
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ hptep[1] &= ~HPTE_R_C;
+ eieio();
+ hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+ rev[i].guest_rpte |= HPTE_R_C;
+ ret = 1;
+ }
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ } while ((i = j) != head);
+
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ unsigned long i;
+ unsigned long *rmapp, *map;
+
+ preempt_disable();
+ rmapp = memslot->rmap;
+ map = memslot->dirty_bitmap;
+ for (i = 0; i < memslot->npages; ++i) {
+ if (kvm_test_clear_dirty(kvm, rmapp))
+ __set_bit_le(i, map);
+ ++rmapp;
+ }
+ preempt_enable();
+ return 0;
+}
+
+void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long *nb_ret)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ struct page *page, *pages[1];
+ int npages;
+ unsigned long hva, psize, offset;
+ unsigned long pa;
+ unsigned long *physp;
+
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return NULL;
+ if (!kvm->arch.using_mmu_notifiers) {
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return NULL;
+ physp += gfn - memslot->base_gfn;
+ pa = *physp;
+ if (!pa) {
+ if (kvmppc_get_guest_page(kvm, gfn, memslot,
+ PAGE_SIZE) < 0)
+ return NULL;
+ pa = *physp;
+ }
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ get_page(page);
+ } else {
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, 1, pages);
+ if (npages < 1)
+ return NULL;
+ page = pages[0];
+ }
+ psize = PAGE_SIZE;
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ psize <<= compound_order(page);
+ }
+ offset = gpa & (psize - 1);
+ if (nb_ret)
+ *nb_ret = psize - offset;
+ return page_address(page) + offset;
+}
+
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
+{
+ struct page *page = virt_to_page(va);
+
+ put_page(page);
+}
+
+void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ vcpu->arch.slb_nr = 32; /* POWER7 */
+ else
+ vcpu->arch.slb_nr = 64;
+
+ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
+ mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
+
+ vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
+}
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
new file mode 100644
index 00000000..f2e6e48e
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -0,0 +1,167 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#define SHADOW_SLB_ESID(num) (SLBSHADOW_SAVEAREA + (num * 0x10))
+#define SHADOW_SLB_VSID(num) (SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8)
+#define UNBOLT_SLB_ENTRY(num) \
+ ld r9, SHADOW_SLB_ESID(num)(r12); \
+ /* Invalid? Skip. */; \
+ rldicl. r0, r9, 37, 63; \
+ beq slb_entry_skip_ ## num; \
+ xoris r9, r9, SLB_ESID_V@h; \
+ std r9, SHADOW_SLB_ESID(num)(r12); \
+ slb_entry_skip_ ## num:
+
+#define REBOLT_SLB_ENTRY(num) \
+ ld r10, SHADOW_SLB_ESID(num)(r11); \
+ cmpdi r10, 0; \
+ beq slb_exit_skip_ ## num; \
+ oris r10, r10, SLB_ESID_V@h; \
+ ld r9, SHADOW_SLB_VSID(num)(r11); \
+ slbmte r9, r10; \
+ std r10, SHADOW_SLB_ESID(num)(r11); \
+slb_exit_skip_ ## num:
+
+/******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
+
+.macro LOAD_GUEST_SEGMENTS
+
+ /* Required state:
+ *
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R1 = host R1
+ * R2 = host R2
+ * R3 = shadow vcpu
+ * all other volatile GPRS = free except R4, R6
+ * SVCPU[CR] = guest CR
+ * SVCPU[XER] = guest XER
+ * SVCPU[CTR] = guest CTR
+ * SVCPU[LR] = guest LR
+ */
+
+ /* Remove LPAR shadow entries */
+
+#if SLB_NUM_BOLTED == 3
+
+ ld r12, PACA_SLBSHADOWPTR(r13)
+
+ /* Save off the first entry so we can slbie it later */
+ ld r10, SHADOW_SLB_ESID(0)(r12)
+ ld r11, SHADOW_SLB_VSID(0)(r12)
+
+ /* Remove bolted entries */
+ UNBOLT_SLB_ENTRY(0)
+ UNBOLT_SLB_ENTRY(1)
+ UNBOLT_SLB_ENTRY(2)
+
+#else
+#error unknown number of bolted entries
+#endif
+
+ /* Flush SLB */
+
+ slbia
+
+ /* r0 = esid & ESID_MASK */
+ rldicr r10, r10, 0, 35
+ /* r0 |= CLASS_BIT(VSID) */
+ rldic r12, r11, 56 - 36, 36
+ or r10, r10, r12
+ slbie r10
+
+ isync
+
+ /* Fill SLB with our shadow */
+
+ lbz r12, SVCPU_SLB_MAX(r3)
+ mulli r12, r12, 16
+ addi r12, r12, SVCPU_SLB
+ add r12, r12, r3
+
+ /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */
+ li r11, SVCPU_SLB
+ add r11, r11, r3
+
+slb_loop_enter:
+
+ ld r10, 0(r11)
+
+ rldicl. r0, r10, 37, 63
+ beq slb_loop_enter_skip
+
+ ld r9, 8(r11)
+ slbmte r9, r10
+
+slb_loop_enter_skip:
+ addi r11, r11, 16
+ cmpd cr0, r11, r12
+ blt slb_loop_enter
+
+slb_do_enter:
+
+.endm
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+.macro LOAD_HOST_SEGMENTS
+
+ /* Register usage at this point:
+ *
+ * R1 = host R1
+ * R2 = host R2
+ * R12 = exit handler id
+ * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
+ * SVCPU.* = guest *
+ * SVCPU[CR] = guest CR
+ * SVCPU[XER] = guest XER
+ * SVCPU[CTR] = guest CTR
+ * SVCPU[LR] = guest LR
+ *
+ */
+
+ /* Restore bolted entries from the shadow and fix it along the way */
+
+ /* We don't store anything in entry 0, so we don't need to take care of it */
+ slbia
+ isync
+
+#if SLB_NUM_BOLTED == 3
+
+ ld r11, PACA_SLBSHADOWPTR(r13)
+
+ REBOLT_SLB_ENTRY(0)
+ REBOLT_SLB_ENTRY(1)
+ REBOLT_SLB_ENTRY(2)
+
+#else
+#error unknown number of bolted entries
+#endif
+
+slb_do_exit:
+
+.endm
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
new file mode 100644
index 00000000..ea0f8c53
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -0,0 +1,73 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_host.h>
+#include <asm/udbg.h>
+
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_spapr_tce_table *stt;
+
+ /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
+ /* liobn, ioba, tce); */
+
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == liobn) {
+ unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
+ struct page *page;
+ u64 *tbl;
+
+ /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
+ /* liobn, stt, stt->window_size); */
+ if (ioba >= stt->window_size)
+ return H_PARAMETER;
+
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ /* FIXME: Need to validate the TCE itself */
+ /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
+ tbl[idx % TCES_PER_PAGE] = tce;
+ return H_SUCCESS;
+ }
+ }
+
+ /* Didn't find the liobn, punt it to userspace */
+ return H_TOO_HARD;
+}
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
new file mode 100644
index 00000000..135663a3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -0,0 +1,592 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_book3s.h>
+#include <asm/reg.h>
+#include <asm/switch_to.h>
+
+#define OP_19_XOP_RFID 18
+#define OP_19_XOP_RFI 50
+
+#define OP_31_XOP_MFMSR 83
+#define OP_31_XOP_MTMSR 146
+#define OP_31_XOP_MTMSRD 178
+#define OP_31_XOP_MTSR 210
+#define OP_31_XOP_MTSRIN 242
+#define OP_31_XOP_TLBIEL 274
+#define OP_31_XOP_TLBIE 306
+#define OP_31_XOP_SLBMTE 402
+#define OP_31_XOP_SLBIE 434
+#define OP_31_XOP_SLBIA 498
+#define OP_31_XOP_MFSR 595
+#define OP_31_XOP_MFSRIN 659
+#define OP_31_XOP_DCBA 758
+#define OP_31_XOP_SLBMFEV 851
+#define OP_31_XOP_EIOIO 854
+#define OP_31_XOP_SLBMFEE 915
+
+/* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
+#define OP_31_XOP_DCBZ 1010
+
+#define OP_LFS 48
+#define OP_LFD 50
+#define OP_STFS 52
+#define OP_STFD 54
+
+#define SPRN_GQR0 912
+#define SPRN_GQR1 913
+#define SPRN_GQR2 914
+#define SPRN_GQR3 915
+#define SPRN_GQR4 916
+#define SPRN_GQR5 917
+#define SPRN_GQR6 918
+#define SPRN_GQR7 919
+
+/* Book3S_32 defines mfsrin(v) - but that messes up our abstract
+ * function pointers, so let's just disable the define. */
+#undef mfsrin
+
+enum priv_level {
+ PRIV_PROBLEM = 0,
+ PRIV_SUPER = 1,
+ PRIV_HYPER = 2,
+};
+
+static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
+{
+ /* PAPR VMs only access supervisor SPRs */
+ if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
+ return false;
+
+ /* Limit user space to its own small SPR set */
+ if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
+ return false;
+
+ return true;
+}
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (get_op(inst)) {
+ case 19:
+ switch (get_xop(inst)) {
+ case OP_19_XOP_RFID:
+ case OP_19_XOP_RFI:
+ kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0);
+ kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
+ *advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+ case 31:
+ switch (get_xop(inst)) {
+ case OP_31_XOP_MFMSR:
+ kvmppc_set_gpr(vcpu, get_rt(inst),
+ vcpu->arch.shared->msr);
+ break;
+ case OP_31_XOP_MTMSRD:
+ {
+ ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
+ if (inst & 0x10000) {
+ vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
+ vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
+ } else
+ kvmppc_set_msr(vcpu, rs);
+ break;
+ }
+ case OP_31_XOP_MTMSR:
+ kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
+ break;
+ case OP_31_XOP_MFSR:
+ {
+ int srnum;
+
+ srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32);
+ if (vcpu->arch.mmu.mfsrin) {
+ u32 sr;
+ sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
+ kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ }
+ break;
+ }
+ case OP_31_XOP_MFSRIN:
+ {
+ int srnum;
+
+ srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
+ if (vcpu->arch.mmu.mfsrin) {
+ u32 sr;
+ sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
+ kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ }
+ break;
+ }
+ case OP_31_XOP_MTSR:
+ vcpu->arch.mmu.mtsrin(vcpu,
+ (inst >> 16) & 0xf,
+ kvmppc_get_gpr(vcpu, get_rs(inst)));
+ break;
+ case OP_31_XOP_MTSRIN:
+ vcpu->arch.mmu.mtsrin(vcpu,
+ (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
+ kvmppc_get_gpr(vcpu, get_rs(inst)));
+ break;
+ case OP_31_XOP_TLBIE:
+ case OP_31_XOP_TLBIEL:
+ {
+ bool large = (inst & 0x00200000) ? true : false;
+ ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
+ vcpu->arch.mmu.tlbie(vcpu, addr, large);
+ break;
+ }
+ case OP_31_XOP_EIOIO:
+ break;
+ case OP_31_XOP_SLBMTE:
+ if (!vcpu->arch.mmu.slbmte)
+ return EMULATE_FAIL;
+
+ vcpu->arch.mmu.slbmte(vcpu,
+ kvmppc_get_gpr(vcpu, get_rs(inst)),
+ kvmppc_get_gpr(vcpu, get_rb(inst)));
+ break;
+ case OP_31_XOP_SLBIE:
+ if (!vcpu->arch.mmu.slbie)
+ return EMULATE_FAIL;
+
+ vcpu->arch.mmu.slbie(vcpu,
+ kvmppc_get_gpr(vcpu, get_rb(inst)));
+ break;
+ case OP_31_XOP_SLBIA:
+ if (!vcpu->arch.mmu.slbia)
+ return EMULATE_FAIL;
+
+ vcpu->arch.mmu.slbia(vcpu);
+ break;
+ case OP_31_XOP_SLBMFEE:
+ if (!vcpu->arch.mmu.slbmfee) {
+ emulated = EMULATE_FAIL;
+ } else {
+ ulong t, rb;
+
+ rb = kvmppc_get_gpr(vcpu, get_rb(inst));
+ t = vcpu->arch.mmu.slbmfee(vcpu, rb);
+ kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ }
+ break;
+ case OP_31_XOP_SLBMFEV:
+ if (!vcpu->arch.mmu.slbmfev) {
+ emulated = EMULATE_FAIL;
+ } else {
+ ulong t, rb;
+
+ rb = kvmppc_get_gpr(vcpu, get_rb(inst));
+ t = vcpu->arch.mmu.slbmfev(vcpu, rb);
+ kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ }
+ break;
+ case OP_31_XOP_DCBA:
+ /* Gets treated as NOP */
+ break;
+ case OP_31_XOP_DCBZ:
+ {
+ ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
+ ulong ra = 0;
+ ulong addr, vaddr;
+ u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ u32 dsisr;
+ int r;
+
+ if (get_ra(inst))
+ ra = kvmppc_get_gpr(vcpu, get_ra(inst));
+
+ addr = (ra + rb) & ~31ULL;
+ if (!(vcpu->arch.shared->msr & MSR_SF))
+ addr &= 0xffffffff;
+ vaddr = addr;
+
+ r = kvmppc_st(vcpu, &addr, 32, zeros, true);
+ if ((r == -ENOENT) || (r == -EPERM)) {
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+
+ svcpu = svcpu_get(vcpu);
+ *advance = 0;
+ vcpu->arch.shared->dar = vaddr;
+ svcpu->fault_dar = vaddr;
+
+ dsisr = DSISR_ISSTORE;
+ if (r == -ENOENT)
+ dsisr |= DSISR_NOHPTE;
+ else if (r == -EPERM)
+ dsisr |= DSISR_PROTFAULT;
+
+ vcpu->arch.shared->dsisr = dsisr;
+ svcpu->fault_dsisr = dsisr;
+ svcpu_put(svcpu);
+
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_DATA_STORAGE);
+ }
+
+ break;
+ }
+ default:
+ emulated = EMULATE_FAIL;
+ }
+ break;
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ if (emulated == EMULATE_FAIL)
+ emulated = kvmppc_emulate_paired_single(run, vcpu);
+
+ return emulated;
+}
+
+void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper,
+ u32 val)
+{
+ if (upper) {
+ /* Upper BAT */
+ u32 bl = (val >> 2) & 0x7ff;
+ bat->bepi_mask = (~bl << 17);
+ bat->bepi = val & 0xfffe0000;
+ bat->vs = (val & 2) ? 1 : 0;
+ bat->vp = (val & 1) ? 1 : 0;
+ bat->raw = (bat->raw & 0xffffffff00000000ULL) | val;
+ } else {
+ /* Lower BAT */
+ bat->brpn = val & 0xfffe0000;
+ bat->wimg = (val >> 3) & 0xf;
+ bat->pp = val & 3;
+ bat->raw = (bat->raw & 0x00000000ffffffffULL) | ((u64)val << 32);
+ }
+}
+
+static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ struct kvmppc_bat *bat;
+
+ switch (sprn) {
+ case SPRN_IBAT0U ... SPRN_IBAT3L:
+ bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
+ break;
+ case SPRN_IBAT4U ... SPRN_IBAT7L:
+ bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
+ break;
+ case SPRN_DBAT0U ... SPRN_DBAT3L:
+ bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
+ break;
+ case SPRN_DBAT4U ... SPRN_DBAT7L:
+ bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
+ break;
+ default:
+ BUG();
+ }
+
+ return bat;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ int emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
+
+ switch (sprn) {
+ case SPRN_SDR1:
+ if (!spr_allowed(vcpu, PRIV_HYPER))
+ goto unprivileged;
+ to_book3s(vcpu)->sdr1 = spr_val;
+ break;
+ case SPRN_DSISR:
+ vcpu->arch.shared->dsisr = spr_val;
+ break;
+ case SPRN_DAR:
+ vcpu->arch.shared->dar = spr_val;
+ break;
+ case SPRN_HIOR:
+ to_book3s(vcpu)->hior = spr_val;
+ break;
+ case SPRN_IBAT0U ... SPRN_IBAT3L:
+ case SPRN_IBAT4U ... SPRN_IBAT7L:
+ case SPRN_DBAT0U ... SPRN_DBAT3L:
+ case SPRN_DBAT4U ... SPRN_DBAT7L:
+ {
+ struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+ kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val);
+ /* BAT writes happen so rarely that we're ok to flush
+ * everything here */
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+ kvmppc_mmu_flush_segments(vcpu);
+ break;
+ }
+ case SPRN_HID0:
+ to_book3s(vcpu)->hid[0] = spr_val;
+ break;
+ case SPRN_HID1:
+ to_book3s(vcpu)->hid[1] = spr_val;
+ break;
+ case SPRN_HID2:
+ to_book3s(vcpu)->hid[2] = spr_val;
+ break;
+ case SPRN_HID2_GEKKO:
+ to_book3s(vcpu)->hid[2] = spr_val;
+ /* HID2.PSE controls paired single on gekko */
+ switch (vcpu->arch.pvr) {
+ case 0x00080200: /* lonestar 2.0 */
+ case 0x00088202: /* lonestar 2.2 */
+ case 0x70000100: /* gekko 1.0 */
+ case 0x00080100: /* gekko 2.0 */
+ case 0x00083203: /* gekko 2.3a */
+ case 0x00083213: /* gekko 2.3b */
+ case 0x00083204: /* gekko 2.4 */
+ case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
+ case 0x00087200: /* broadway */
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_NATIVE_PS) {
+ /* Native paired singles */
+ } else if (spr_val & (1 << 29)) { /* HID2.PSE */
+ vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE;
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ } else {
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE;
+ }
+ break;
+ }
+ break;
+ case SPRN_HID4:
+ case SPRN_HID4_GEKKO:
+ to_book3s(vcpu)->hid[4] = spr_val;
+ break;
+ case SPRN_HID5:
+ to_book3s(vcpu)->hid[5] = spr_val;
+ /* guest HID5 set can change is_dcbz32 */
+ if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (mfmsr() & MSR_HV))
+ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+ break;
+ case SPRN_GQR0:
+ case SPRN_GQR1:
+ case SPRN_GQR2:
+ case SPRN_GQR3:
+ case SPRN_GQR4:
+ case SPRN_GQR5:
+ case SPRN_GQR6:
+ case SPRN_GQR7:
+ to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
+ break;
+ case SPRN_ICTC:
+ case SPRN_THRM1:
+ case SPRN_THRM2:
+ case SPRN_THRM3:
+ case SPRN_CTRLF:
+ case SPRN_CTRLT:
+ case SPRN_L2CR:
+ case SPRN_MMCR0_GEKKO:
+ case SPRN_MMCR1_GEKKO:
+ case SPRN_PMC1_GEKKO:
+ case SPRN_PMC2_GEKKO:
+ case SPRN_PMC3_GEKKO:
+ case SPRN_PMC4_GEKKO:
+ case SPRN_WPAR_GEKKO:
+ break;
+unprivileged:
+ default:
+ printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
+#ifndef DEBUG_SPR
+ emulated = EMULATE_FAIL;
+#endif
+ break;
+ }
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_IBAT0U ... SPRN_IBAT3L:
+ case SPRN_IBAT4U ... SPRN_IBAT7L:
+ case SPRN_DBAT0U ... SPRN_DBAT3L:
+ case SPRN_DBAT4U ... SPRN_DBAT7L:
+ {
+ struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+ if (sprn % 2)
+ kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
+ else
+ kvmppc_set_gpr(vcpu, rt, bat->raw);
+
+ break;
+ }
+ case SPRN_SDR1:
+ if (!spr_allowed(vcpu, PRIV_HYPER))
+ goto unprivileged;
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
+ break;
+ case SPRN_DSISR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
+ break;
+ case SPRN_DAR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
+ break;
+ case SPRN_HIOR:
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
+ break;
+ case SPRN_HID0:
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
+ break;
+ case SPRN_HID1:
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
+ break;
+ case SPRN_HID2:
+ case SPRN_HID2_GEKKO:
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
+ break;
+ case SPRN_HID4:
+ case SPRN_HID4_GEKKO:
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
+ break;
+ case SPRN_HID5:
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
+ break;
+ case SPRN_CFAR:
+ case SPRN_PURR:
+ kvmppc_set_gpr(vcpu, rt, 0);
+ break;
+ case SPRN_GQR0:
+ case SPRN_GQR1:
+ case SPRN_GQR2:
+ case SPRN_GQR3:
+ case SPRN_GQR4:
+ case SPRN_GQR5:
+ case SPRN_GQR6:
+ case SPRN_GQR7:
+ kvmppc_set_gpr(vcpu, rt,
+ to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]);
+ break;
+ case SPRN_THRM1:
+ case SPRN_THRM2:
+ case SPRN_THRM3:
+ case SPRN_CTRLF:
+ case SPRN_CTRLT:
+ case SPRN_L2CR:
+ case SPRN_MMCR0_GEKKO:
+ case SPRN_MMCR1_GEKKO:
+ case SPRN_PMC1_GEKKO:
+ case SPRN_PMC2_GEKKO:
+ case SPRN_PMC3_GEKKO:
+ case SPRN_PMC4_GEKKO:
+ case SPRN_WPAR_GEKKO:
+ kvmppc_set_gpr(vcpu, rt, 0);
+ break;
+ default:
+unprivileged:
+ printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
+#ifndef DEBUG_SPR
+ emulated = EMULATE_FAIL;
+#endif
+ break;
+ }
+
+ return emulated;
+}
+
+u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
+{
+ u32 dsisr = 0;
+
+ /*
+ * This is what the spec says about DSISR bits (not mentioned = 0):
+ *
+ * 12:13 [DS] Set to bits 30:31
+ * 15:16 [X] Set to bits 29:30
+ * 17 [X] Set to bit 25
+ * [D/DS] Set to bit 5
+ * 18:21 [X] Set to bits 21:24
+ * [D/DS] Set to bits 1:4
+ * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS)
+ * 27:31 Set to bits 11:15 (RA)
+ */
+
+ switch (get_op(inst)) {
+ /* D-form */
+ case OP_LFS:
+ case OP_LFD:
+ case OP_STFD:
+ case OP_STFS:
+ dsisr |= (inst >> 12) & 0x4000; /* bit 17 */
+ dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */
+ break;
+ /* X-form */
+ case 31:
+ dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */
+ dsisr |= (inst << 8) & 0x04000; /* bit 17 */
+ dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */
+ break;
+ default:
+ printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
+ break;
+ }
+
+ dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */
+
+ return dsisr;
+}
+
+ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
+{
+ ulong dar = 0;
+ ulong ra;
+
+ switch (get_op(inst)) {
+ case OP_LFS:
+ case OP_LFD:
+ case OP_STFD:
+ case OP_STFS:
+ ra = get_ra(inst);
+ if (ra)
+ dar = kvmppc_get_gpr(vcpu, ra);
+ dar += (s32)((s16)inst);
+ break;
+ case 31:
+ ra = get_ra(inst);
+ if (ra)
+ dar = kvmppc_get_gpr(vcpu, ra);
+ dar += kvmppc_get_gpr(vcpu, get_rb(inst));
+ break;
+ default:
+ printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
+ break;
+ }
+
+ return dar;
+}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
new file mode 100644
index 00000000..a150817d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -0,0 +1,35 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/export.h>
+#include <asm/kvm_book3s.h>
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
+#else
+EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
+EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
+#ifdef CONFIG_ALTIVEC
+EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
+#endif
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
+#endif
+#endif
+
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
new file mode 100644
index 00000000..108d1f58
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -0,0 +1,1437 @@
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Paul Mackerras <paulus@au1.ibm.com>
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * Description: KVM functions specific to running on Book 3S
+ * processors in hypervisor mode (specifically POWER7 and later).
+ *
+ * This file is derived from arch/powerpc/kvm/book3s.c,
+ * by Alexander Graf <agraf@suse.de>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/page-flags.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/lppaca.h>
+#include <asm/processor.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/switch_to.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+
+/* #define EXIT_DEBUG */
+/* #define EXIT_DEBUG_SIMPLE */
+/* #define EXIT_DEBUG_INT */
+
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ local_paca->kvm_hstate.kvm_vcpu = vcpu;
+ local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+ vcpu->arch.shregs.msr = msr;
+ kvmppc_end_cede(vcpu);
+}
+
+void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+{
+ vcpu->arch.pvr = pvr;
+}
+
+void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
+ pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
+ vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
+ for (r = 0; r < 16; ++r)
+ pr_err("r%2d = %.16lx r%d = %.16lx\n",
+ r, kvmppc_get_gpr(vcpu, r),
+ r+16, kvmppc_get_gpr(vcpu, r+16));
+ pr_err("ctr = %.16lx lr = %.16lx\n",
+ vcpu->arch.ctr, vcpu->arch.lr);
+ pr_err("srr0 = %.16llx srr1 = %.16llx\n",
+ vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
+ pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
+ vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
+ pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
+ vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
+ pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
+ vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
+ pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
+ pr_err("fault dar = %.16lx dsisr = %.8x\n",
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
+ for (r = 0; r < vcpu->arch.slb_max; ++r)
+ pr_err(" ESID = %.16llx VSID = %.16llx\n",
+ vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
+ pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
+ vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
+ vcpu->arch.last_inst);
+}
+
+struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+{
+ int r;
+ struct kvm_vcpu *v, *ret = NULL;
+
+ mutex_lock(&kvm->lock);
+ kvm_for_each_vcpu(r, v, kvm) {
+ if (v->vcpu_id == id) {
+ ret = v;
+ break;
+ }
+ }
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
+{
+ vpa->shared_proc = 1;
+ vpa->yield_count = 1;
+}
+
+static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
+ unsigned long flags,
+ unsigned long vcpuid, unsigned long vpa)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long len, nb;
+ void *va;
+ struct kvm_vcpu *tvcpu;
+ int err = H_PARAMETER;
+
+ tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
+ if (!tvcpu)
+ return H_PARAMETER;
+
+ flags >>= 63 - 18;
+ flags &= 7;
+ if (flags == 0 || flags == 4)
+ return H_PARAMETER;
+ if (flags < 4) {
+ if (vpa & 0x7f)
+ return H_PARAMETER;
+ if (flags >= 2 && !tvcpu->arch.vpa)
+ return H_RESOURCE;
+ /* registering new area; convert logical addr to real */
+ va = kvmppc_pin_guest_page(kvm, vpa, &nb);
+ if (va == NULL)
+ return H_PARAMETER;
+ if (flags <= 1)
+ len = *(unsigned short *)(va + 4);
+ else
+ len = *(unsigned int *)(va + 4);
+ if (len > nb)
+ goto out_unpin;
+ switch (flags) {
+ case 1: /* register VPA */
+ if (len < 640)
+ goto out_unpin;
+ if (tvcpu->arch.vpa)
+ kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
+ tvcpu->arch.vpa = va;
+ init_vpa(vcpu, va);
+ break;
+ case 2: /* register DTL */
+ if (len < 48)
+ goto out_unpin;
+ len -= len % 48;
+ if (tvcpu->arch.dtl)
+ kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
+ tvcpu->arch.dtl = va;
+ tvcpu->arch.dtl_end = va + len;
+ break;
+ case 3: /* register SLB shadow buffer */
+ if (len < 16)
+ goto out_unpin;
+ if (tvcpu->arch.slb_shadow)
+ kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
+ tvcpu->arch.slb_shadow = va;
+ break;
+ }
+ } else {
+ switch (flags) {
+ case 5: /* unregister VPA */
+ if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
+ return H_RESOURCE;
+ if (!tvcpu->arch.vpa)
+ break;
+ kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
+ tvcpu->arch.vpa = NULL;
+ break;
+ case 6: /* unregister DTL */
+ if (!tvcpu->arch.dtl)
+ break;
+ kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
+ tvcpu->arch.dtl = NULL;
+ break;
+ case 7: /* unregister SLB shadow buffer */
+ if (!tvcpu->arch.slb_shadow)
+ break;
+ kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
+ tvcpu->arch.slb_shadow = NULL;
+ break;
+ }
+ }
+ return H_SUCCESS;
+
+ out_unpin:
+ kvmppc_unpin_guest_page(kvm, va);
+ return err;
+}
+
+int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+ unsigned long target, ret = H_SUCCESS;
+ struct kvm_vcpu *tvcpu;
+
+ switch (req) {
+ case H_ENTER:
+ ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ break;
+ case H_CEDE:
+ break;
+ case H_PROD:
+ target = kvmppc_get_gpr(vcpu, 4);
+ tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ if (!tvcpu) {
+ ret = H_PARAMETER;
+ break;
+ }
+ tvcpu->arch.prodded = 1;
+ smp_mb();
+ if (vcpu->arch.ceded) {
+ if (waitqueue_active(&vcpu->wq)) {
+ wake_up_interruptible(&vcpu->wq);
+ vcpu->stat.halt_wakeup++;
+ }
+ }
+ break;
+ case H_CONFER:
+ break;
+ case H_REGISTER_VPA:
+ ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ default:
+ return RESUME_HOST;
+ }
+ kvmppc_set_gpr(vcpu, 3, ret);
+ vcpu->arch.hcall_needed = 0;
+ return RESUME_GUEST;
+}
+
+static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ struct task_struct *tsk)
+{
+ int r = RESUME_HOST;
+
+ vcpu->stat.sum_exits++;
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+ switch (vcpu->arch.trap) {
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PERFMON:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PROGRAM:
+ {
+ ulong flags;
+ /*
+ * Normally program interrupts are delivered directly
+ * to the guest by the hardware, but we can get here
+ * as a result of a hypervisor emulation interrupt
+ * (e40) getting turned into a 700 by BML RTAS.
+ */
+ flags = vcpu->arch.shregs.msr & 0x1f0000ull;
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ /* hcall - punt to userspace */
+ int i;
+
+ if (vcpu->arch.shregs.msr & MSR_PR) {
+ /* sc 1 from userspace - reflect to guest syscall */
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
+ r = RESUME_GUEST;
+ break;
+ }
+ run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
+ for (i = 0; i < 9; ++i)
+ run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ r = RESUME_HOST;
+ break;
+ }
+ /*
+ * We get these next two if the guest accesses a page which it thinks
+ * it has mapped but which is not actually present, either because
+ * it is for an emulated I/O device or because the corresonding
+ * host page has been paged out. Any other HDSI/HISI interrupts
+ * have been handled already.
+ */
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ r = kvmppc_book3s_hv_page_fault(run, vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ break;
+ case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ r = kvmppc_book3s_hv_page_fault(run, vcpu,
+ kvmppc_get_pc(vcpu), 0);
+ break;
+ /*
+ * This occurs if the guest executes an illegal instruction.
+ * We just generate a program interrupt to the guest, since
+ * we don't emulate any guest instructions at this stage.
+ */
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+ kvmppc_core_queue_program(vcpu, 0x80000);
+ r = RESUME_GUEST;
+ break;
+ default:
+ kvmppc_dump_regs(vcpu);
+ printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ r = RESUME_HOST;
+ BUG();
+ break;
+ }
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int i;
+
+ sregs->pvr = vcpu->arch.pvr;
+
+ memset(sregs, 0, sizeof(struct kvm_sregs));
+ for (i = 0; i < vcpu->arch.slb_max; i++) {
+ sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
+ sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+ }
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int i, j;
+
+ kvmppc_set_pvr(vcpu, sregs->pvr);
+
+ j = 0;
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
+ vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
+ vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
+ ++j;
+ }
+ }
+ vcpu->arch.slb_max = j;
+
+ return 0;
+}
+
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = put_user(0, (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ {
+ u64 hior;
+ /* Only allow this to be set to zero */
+ r = get_user(hior, (u64 __user *)reg->addr);
+ if (!r && (hior != 0))
+ r = -EINVAL;
+ break;
+ }
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+ return -EIO;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvm_vcpu *vcpu;
+ int err = -EINVAL;
+ int core;
+ struct kvmppc_vcore *vcore;
+
+ core = id / threads_per_core;
+ if (core >= KVM_MAX_VCORES)
+ goto out;
+
+ err = -ENOMEM;
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+ goto out;
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ vcpu->arch.shared = &vcpu->arch.shregs;
+ vcpu->arch.last_cpu = -1;
+ vcpu->arch.mmcr[0] = MMCR0_FC;
+ vcpu->arch.ctrl = CTRL_RUNLATCH;
+ /* default to host PVR, since we can't spoof it */
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+
+ kvmppc_mmu_book3s_hv_init(vcpu);
+
+ /*
+ * We consider the vcpu stopped until we see the first run ioctl for it.
+ */
+ vcpu->arch.state = KVMPPC_VCPU_STOPPED;
+
+ init_waitqueue_head(&vcpu->arch.cpu_run);
+
+ mutex_lock(&kvm->lock);
+ vcore = kvm->arch.vcores[core];
+ if (!vcore) {
+ vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+ if (vcore) {
+ INIT_LIST_HEAD(&vcore->runnable_threads);
+ spin_lock_init(&vcore->lock);
+ init_waitqueue_head(&vcore->wq);
+ }
+ kvm->arch.vcores[core] = vcore;
+ }
+ mutex_unlock(&kvm->lock);
+
+ if (!vcore)
+ goto free_vcpu;
+
+ spin_lock(&vcore->lock);
+ ++vcore->num_threads;
+ spin_unlock(&vcore->lock);
+ vcpu->arch.vcore = vcore;
+
+ vcpu->arch.cpu_type = KVM_CPU_3S_64;
+ kvmppc_sanity_check(vcpu);
+
+ return vcpu;
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.dtl)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
+ if (vcpu->arch.slb_shadow)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
+ if (vcpu->arch.vpa)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long dec_nsec, now;
+
+ now = get_tb();
+ if (now > vcpu->arch.dec_expires) {
+ /* decrementer has already gone negative */
+ kvmppc_core_queue_dec(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
+ return;
+ }
+ dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
+ / tb_ticks_per_sec;
+ hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
+ HRTIMER_MODE_REL);
+ vcpu->arch.timer_running = 1;
+}
+
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+}
+
+extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+extern void xics_wake_cpu(int cpu);
+
+static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
+ struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *v;
+
+ if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+ return;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+ --vc->n_runnable;
+ ++vc->n_busy;
+ /* decrement the physical thread id of each following vcpu */
+ v = vcpu;
+ list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
+ --v->arch.ptid;
+ list_del(&vcpu->arch.run_list);
+}
+
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ struct paca_struct *tpaca;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+ cpu = vc->pcpu + vcpu->arch.ptid;
+ tpaca = &paca[cpu];
+ tpaca->kvm_hstate.kvm_vcpu = vcpu;
+ tpaca->kvm_hstate.kvm_vcore = vc;
+ tpaca->kvm_hstate.napping = 0;
+ vcpu->cpu = vc->pcpu;
+ smp_wmb();
+#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+ if (vcpu->arch.ptid) {
+ tpaca->cpu_start = 0x80;
+ wmb();
+ xics_wake_cpu(cpu);
+ ++vc->n_woken;
+ }
+#endif
+}
+
+static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
+{
+ int i;
+
+ HMT_low();
+ i = 0;
+ while (vc->nap_count < vc->n_woken) {
+ if (++i >= 1000000) {
+ pr_err("kvmppc_wait_for_nap timeout %d %d\n",
+ vc->nap_count, vc->n_woken);
+ break;
+ }
+ cpu_relax();
+ }
+ HMT_medium();
+}
+
+/*
+ * Check that we are on thread 0 and that any other threads in
+ * this core are off-line.
+ */
+static int on_primary_thread(void)
+{
+ int cpu = smp_processor_id();
+ int thr = cpu_thread_in_core(cpu);
+
+ if (thr)
+ return 0;
+ while (++thr < threads_per_core)
+ if (cpu_online(cpu + thr))
+ return 0;
+ return 1;
+}
+
+/*
+ * Run a set of guest threads on a physical core.
+ * Called with vc->lock held.
+ */
+static int kvmppc_run_core(struct kvmppc_vcore *vc)
+{
+ struct kvm_vcpu *vcpu, *vcpu0, *vnext;
+ long ret;
+ u64 now;
+ int ptid;
+
+ /* don't start if any threads have a signal pending */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ if (signal_pending(vcpu->arch.run_task))
+ return 0;
+
+ /*
+ * Make sure we are running on thread 0, and that
+ * secondary threads are offline.
+ * XXX we should also block attempts to bring any
+ * secondary threads online.
+ */
+ if (threads_per_core > 1 && !on_primary_thread()) {
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ vcpu->arch.ret = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * Assign physical thread IDs, first to non-ceded vcpus
+ * and then to ceded ones.
+ */
+ ptid = 0;
+ vcpu0 = NULL;
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ if (!vcpu->arch.ceded) {
+ if (!ptid)
+ vcpu0 = vcpu;
+ vcpu->arch.ptid = ptid++;
+ }
+ }
+ if (!vcpu0)
+ return 0; /* nothing to run */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ if (vcpu->arch.ceded)
+ vcpu->arch.ptid = ptid++;
+
+ vc->n_woken = 0;
+ vc->nap_count = 0;
+ vc->entry_exit_count = 0;
+ vc->vcore_state = VCORE_RUNNING;
+ vc->in_guest = 0;
+ vc->pcpu = smp_processor_id();
+ vc->napping_threads = 0;
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ kvmppc_start_thread(vcpu);
+
+ preempt_disable();
+ spin_unlock(&vc->lock);
+
+ kvm_guest_enter();
+ __kvmppc_vcore_entry(NULL, vcpu0);
+
+ spin_lock(&vc->lock);
+ /* disable sending of IPIs on virtual external irqs */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ vcpu->cpu = -1;
+ /* wait for secondary threads to finish writing their state to memory */
+ if (vc->nap_count < vc->n_woken)
+ kvmppc_wait_for_nap(vc);
+ /* prevent other vcpu threads from doing kvmppc_start_thread() now */
+ vc->vcore_state = VCORE_EXITING;
+ spin_unlock(&vc->lock);
+
+ /* make sure updates to secondary vcpu structs are visible now */
+ smp_mb();
+ kvm_guest_exit();
+
+ preempt_enable();
+ kvm_resched(vcpu);
+
+ now = get_tb();
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ /* cancel pending dec exception if dec is positive */
+ if (now < vcpu->arch.dec_expires &&
+ kvmppc_core_pending_dec(vcpu))
+ kvmppc_core_dequeue_dec(vcpu);
+
+ ret = RESUME_GUEST;
+ if (vcpu->arch.trap)
+ ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
+ vcpu->arch.run_task);
+
+ vcpu->arch.ret = ret;
+ vcpu->arch.trap = 0;
+
+ if (vcpu->arch.ceded) {
+ if (ret != RESUME_GUEST)
+ kvmppc_end_cede(vcpu);
+ else
+ kvmppc_set_timer(vcpu);
+ }
+ }
+
+ spin_lock(&vc->lock);
+ out:
+ vc->vcore_state = VCORE_INACTIVE;
+ list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+ arch.run_list) {
+ if (vcpu->arch.ret != RESUME_GUEST) {
+ kvmppc_remove_runnable(vc, vcpu);
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Wait for some other vcpu thread to execute us, and
+ * wake us up when we need to handle something in the host.
+ */
+static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
+{
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+ schedule();
+ finish_wait(&vcpu->arch.cpu_run, &wait);
+}
+
+/*
+ * All the vcpus in this vcore are idle, so wait for a decrementer
+ * or external interrupt to one of the vcpus. vc->lock is held.
+ */
+static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
+{
+ DEFINE_WAIT(wait);
+ struct kvm_vcpu *v;
+ int all_idle = 1;
+
+ prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ vc->vcore_state = VCORE_SLEEPING;
+ spin_unlock(&vc->lock);
+ list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+ if (!v->arch.ceded || v->arch.pending_exceptions) {
+ all_idle = 0;
+ break;
+ }
+ }
+ if (all_idle)
+ schedule();
+ finish_wait(&vc->wq, &wait);
+ spin_lock(&vc->lock);
+ vc->vcore_state = VCORE_INACTIVE;
+}
+
+static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int n_ceded;
+ int prev_state;
+ struct kvmppc_vcore *vc;
+ struct kvm_vcpu *v, *vn;
+
+ kvm_run->exit_reason = 0;
+ vcpu->arch.ret = RESUME_GUEST;
+ vcpu->arch.trap = 0;
+
+ /*
+ * Synchronize with other threads in this virtual core
+ */
+ vc = vcpu->arch.vcore;
+ spin_lock(&vc->lock);
+ vcpu->arch.ceded = 0;
+ vcpu->arch.run_task = current;
+ vcpu->arch.kvm_run = kvm_run;
+ prev_state = vcpu->arch.state;
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+ list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+ ++vc->n_runnable;
+
+ /*
+ * This happens the first time this is called for a vcpu.
+ * If the vcore is already running, we may be able to start
+ * this thread straight away and have it join in.
+ */
+ if (prev_state == KVMPPC_VCPU_STOPPED) {
+ if (vc->vcore_state == VCORE_RUNNING &&
+ VCORE_EXIT_COUNT(vc) == 0) {
+ vcpu->arch.ptid = vc->n_runnable - 1;
+ kvmppc_start_thread(vcpu);
+ }
+
+ } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
+ --vc->n_busy;
+
+ while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ !signal_pending(current)) {
+ if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
+ spin_unlock(&vc->lock);
+ kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
+ spin_lock(&vc->lock);
+ continue;
+ }
+ n_ceded = 0;
+ list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+ n_ceded += v->arch.ceded;
+ if (n_ceded == vc->n_runnable)
+ kvmppc_vcore_blocked(vc);
+ else
+ kvmppc_run_core(vc);
+
+ list_for_each_entry_safe(v, vn, &vc->runnable_threads,
+ arch.run_list) {
+ kvmppc_core_prepare_to_enter(v);
+ if (signal_pending(v->arch.run_task)) {
+ kvmppc_remove_runnable(vc, v);
+ v->stat.signal_exits++;
+ v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+ v->arch.ret = -EINTR;
+ wake_up(&v->arch.cpu_run);
+ }
+ }
+ }
+
+ if (signal_pending(current)) {
+ if (vc->vcore_state == VCORE_RUNNING ||
+ vc->vcore_state == VCORE_EXITING) {
+ spin_unlock(&vc->lock);
+ kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
+ spin_lock(&vc->lock);
+ }
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+ kvmppc_remove_runnable(vc, vcpu);
+ vcpu->stat.signal_exits++;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ }
+ }
+
+ spin_unlock(&vc->lock);
+ return vcpu->arch.ret;
+}
+
+int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ if (!vcpu->arch.sane) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
+
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ /* No need to go into the guest when all we'll do is come back out */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+ /* On the first time here, set up VRMA or RMA */
+ if (!vcpu->kvm->arch.rma_setup_done) {
+ r = kvmppc_hv_setup_rma(vcpu);
+ if (r)
+ return r;
+ }
+
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+ flush_vsx_to_thread(current);
+ vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+ vcpu->arch.pgdir = current->mm->pgd;
+
+ do {
+ r = kvmppc_run_vcpu(run, vcpu);
+
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ r = kvmppc_pseries_do_hcall(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
+ }
+ } while (r == RESUME_GUEST);
+ return r;
+}
+
+static long kvmppc_stt_npages(unsigned long window_size)
+{
+ return ALIGN((window_size >> SPAPR_TCE_SHIFT)
+ * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+{
+ struct kvm *kvm = stt->kvm;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ list_del(&stt->list);
+ for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ __free_page(stt->pages[i]);
+ kfree(stt);
+ mutex_unlock(&kvm->lock);
+
+ kvm_put_kvm(kvm);
+}
+
+static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ return VM_FAULT_SIGBUS;
+
+ page = stt->pages[vmf->pgoff];
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
+ .fault = kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &kvm_spapr_tce_vm_ops;
+ return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
+{
+ struct kvmppc_spapr_tce_table *stt = filp->private_data;
+
+ release_spapr_tce_table(stt);
+ return 0;
+}
+
+static struct file_operations kvm_spapr_tce_fops = {
+ .mmap = kvm_spapr_tce_mmap,
+ .release = kvm_spapr_tce_release,
+};
+
+long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ struct kvm_create_spapr_tce *args)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ long npages;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Check this LIOBN hasn't been previously allocated */
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == args->liobn)
+ return -EBUSY;
+ }
+
+ npages = kvmppc_stt_npages(args->window_size);
+
+ stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
+ GFP_KERNEL);
+ if (!stt)
+ goto fail;
+
+ stt->liobn = args->liobn;
+ stt->window_size = args->window_size;
+ stt->kvm = kvm;
+
+ for (i = 0; i < npages; i++) {
+ stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!stt->pages[i])
+ goto fail;
+ }
+
+ kvm_get_kvm(kvm);
+
+ mutex_lock(&kvm->lock);
+ list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+
+ mutex_unlock(&kvm->lock);
+
+ return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR);
+
+fail:
+ if (stt) {
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
+
+ kfree(stt);
+ }
+ return ret;
+}
+
+/* Work out RMLS (real mode limit selector) field value for a given RMA size.
+ Assumes POWER7 or PPC970. */
+static inline int lpcr_rmls(unsigned long rma_size)
+{
+ switch (rma_size) {
+ case 32ul << 20: /* 32 MB */
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return 8; /* only supported on POWER7 */
+ return -1;
+ case 64ul << 20: /* 64 MB */
+ return 3;
+ case 128ul << 20: /* 128 MB */
+ return 7;
+ case 256ul << 20: /* 256 MB */
+ return 4;
+ case 1ul << 30: /* 1 GB */
+ return 2;
+ case 16ul << 30: /* 16 GB */
+ return 1;
+ case 256ul << 30: /* 256 GB */
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kvmppc_linear_info *ri = vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >= ri->npages)
+ return VM_FAULT_SIGBUS;
+
+ page = pfn_to_page(ri->base_pfn + vmf->pgoff);
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_rma_vm_ops = {
+ .fault = kvm_rma_fault,
+};
+
+static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_flags |= VM_RESERVED;
+ vma->vm_ops = &kvm_rma_vm_ops;
+ return 0;
+}
+
+static int kvm_rma_release(struct inode *inode, struct file *filp)
+{
+ struct kvmppc_linear_info *ri = filp->private_data;
+
+ kvm_release_rma(ri);
+ return 0;
+}
+
+static struct file_operations kvm_rma_fops = {
+ .mmap = kvm_rma_mmap,
+ .release = kvm_rma_release,
+};
+
+long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
+{
+ struct kvmppc_linear_info *ri;
+ long fd;
+
+ ri = kvm_alloc_rma();
+ if (!ri)
+ return -ENOMEM;
+
+ fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
+ if (fd < 0)
+ kvm_release_rma(ri);
+
+ ret->rma_size = ri->npages << PAGE_SHIFT;
+ return fd;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ struct kvm_memory_slot *memslot;
+ int r;
+ unsigned long n;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = -EINVAL;
+ if (log->slot >= KVM_MEMORY_SLOTS)
+ goto out;
+
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+ r = -ENOENT;
+ if (!memslot->dirty_bitmap)
+ goto out;
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+
+ r = kvmppc_hv_get_dirty_log(kvm, memslot);
+ if (r)
+ goto out;
+
+ r = -EFAULT;
+ if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
+ goto out;
+
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+static unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize == 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+ unsigned long npages;
+ unsigned long *phys;
+
+ /* Allocate a slot_phys array */
+ phys = kvm->arch.slot_phys[mem->slot];
+ if (!kvm->arch.using_mmu_notifiers && !phys) {
+ npages = mem->memory_size >> PAGE_SHIFT;
+ phys = vzalloc(npages * sizeof(unsigned long));
+ if (!phys)
+ return -ENOMEM;
+ kvm->arch.slot_phys[mem->slot] = phys;
+ kvm->arch.slot_npages[mem->slot] = npages;
+ }
+
+ return 0;
+}
+
+static void unpin_slot(struct kvm *kvm, int slot_id)
+{
+ unsigned long *physp;
+ unsigned long j, npages, pfn;
+ struct page *page;
+
+ physp = kvm->arch.slot_phys[slot_id];
+ npages = kvm->arch.slot_npages[slot_id];
+ if (physp) {
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (j = 0; j < npages; j++) {
+ if (!(physp[j] & KVMPPC_GOT_PAGE))
+ continue;
+ pfn = physp[j] >> PAGE_SHIFT;
+ page = pfn_to_page(pfn);
+ SetPageDirty(page);
+ put_page(page);
+ }
+ kvm->arch.slot_phys[slot_id] = NULL;
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ vfree(physp);
+ }
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+}
+
+static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
+{
+ int err = 0;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_linear_info *ri = NULL;
+ unsigned long hva;
+ struct kvm_memory_slot *memslot;
+ struct vm_area_struct *vma;
+ unsigned long lpcr, senc;
+ unsigned long psize, porder;
+ unsigned long rma_size;
+ unsigned long rmls;
+ unsigned long *physp;
+ unsigned long i, npages;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.rma_setup_done)
+ goto out; /* another vcpu beat us to it */
+
+ /* Look up the memslot for guest physical address 0 */
+ memslot = gfn_to_memslot(kvm, 0);
+
+ /* We must have some memory at 0 by now */
+ err = -EINVAL;
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ goto out;
+
+ /* Look up the VMA for the start of this memory slot */
+ hva = memslot->userspace_addr;
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, hva);
+ if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+ goto up_out;
+
+ psize = vma_kernel_pagesize(vma);
+ porder = __ilog2(psize);
+
+ /* Is this one of our preallocated RMAs? */
+ if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
+ hva == vma->vm_start)
+ ri = vma->vm_file->private_data;
+
+ up_read(&current->mm->mmap_sem);
+
+ if (!ri) {
+ /* On POWER7, use VRMA; on PPC970, give up */
+ err = -EPERM;
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ pr_err("KVM: CPU requires an RMO\n");
+ goto out;
+ }
+
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ err = -EINVAL;
+ if (!(psize == 0x1000 || psize == 0x10000 ||
+ psize == 0x1000000))
+ goto out;
+
+ /* Update VRMASD field in the LPCR */
+ senc = slb_pgsize_encoding(psize);
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+ lpcr |= senc << (LPCR_VRMASD_SH - 4);
+ kvm->arch.lpcr = lpcr;
+
+ /* Create HPTEs in the hash page table for the VRMA */
+ kvmppc_map_vrma(vcpu, memslot, porder);
+
+ } else {
+ /* Set up to use an RMO region */
+ rma_size = ri->npages;
+ if (rma_size > memslot->npages)
+ rma_size = memslot->npages;
+ rma_size <<= PAGE_SHIFT;
+ rmls = lpcr_rmls(rma_size);
+ err = -EINVAL;
+ if (rmls < 0) {
+ pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
+ goto out;
+ }
+ atomic_inc(&ri->use_count);
+ kvm->arch.rma = ri;
+
+ /* Update LPCR and RMOR */
+ lpcr = kvm->arch.lpcr;
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970; insert RMLS value (split field) in HID4 */
+ lpcr &= ~((1ul << HID4_RMLS0_SH) |
+ (3ul << HID4_RMLS2_SH));
+ lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
+ ((rmls & 3) << HID4_RMLS2_SH);
+ /* RMOR is also in HID4 */
+ lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
+ << HID4_RMOR_SH;
+ } else {
+ /* POWER7 */
+ lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
+ lpcr |= rmls << LPCR_RMLS_SH;
+ kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+ }
+ kvm->arch.lpcr = lpcr;
+ pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
+ ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
+
+ /* Initialize phys addrs of pages in RMO */
+ npages = ri->npages;
+ porder = __ilog2(npages);
+ physp = kvm->arch.slot_phys[memslot->id];
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i)
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ }
+
+ /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
+ smp_wmb();
+ kvm->arch.rma_setup_done = 1;
+ err = 0;
+ out:
+ mutex_unlock(&kvm->lock);
+ return err;
+
+ up_out:
+ up_read(&current->mm->mmap_sem);
+ goto out;
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ long r;
+ unsigned long lpcr;
+
+ /* Allocate hashed page table */
+ r = kvmppc_alloc_hpt(kvm);
+ if (r)
+ return r;
+
+ INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+
+ kvm->arch.rma = NULL;
+
+ kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970; HID4 is effectively the LPCR */
+ unsigned long lpid = kvm->arch.lpid;
+ kvm->arch.host_lpid = 0;
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
+ lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
+ lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
+ ((lpid & 0xf) << HID4_LPID5_SH);
+ } else {
+ /* POWER7; init LPCR for virtual RMA mode */
+ kvm->arch.host_lpid = mfspr(SPRN_LPID);
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+ lpcr &= LPCR_PECE | LPCR_LPES;
+ lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+ LPCR_VPM0 | LPCR_VPM1;
+ kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ }
+ kvm->arch.lpcr = lpcr;
+
+ kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
+ spin_lock_init(&kvm->arch.slot_phys_lock);
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ unsigned long i;
+
+ if (!kvm->arch.using_mmu_notifiers)
+ for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ unpin_slot(kvm, i);
+
+ if (kvm->arch.rma) {
+ kvm_release_rma(kvm->arch.rma);
+ kvm->arch.rma = NULL;
+ }
+
+ kvmppc_free_hpt(kvm);
+ WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+}
+
+/* These are stubs for now */
+void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+{
+}
+
+/* We don't need to emulate any privileged instructions or dcbz */
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ return EMULATE_FAIL;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ return EMULATE_FAIL;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ return EMULATE_FAIL;
+}
+
+static int kvmppc_book3s_hv_init(void)
+{
+ int r;
+
+ r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+
+ if (r)
+ return r;
+
+ r = kvmppc_mmu_hv_init();
+
+ return r;
+}
+
+static void kvmppc_book3s_hv_exit(void)
+{
+ kvm_exit();
+}
+
+module_init(kvmppc_book3s_hv_init);
+module_exit(kvmppc_book3s_hv_exit);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
new file mode 100644
index 00000000..e1b60f56
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+
+#include <asm/cputable.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+#define KVM_LINEAR_RMA 0
+#define KVM_LINEAR_HPT 1
+
+static void __init kvm_linear_init_one(ulong size, int count, int type);
+static struct kvmppc_linear_info *kvm_alloc_linear(int type);
+static void kvm_release_linear(struct kvmppc_linear_info *ri);
+
+/*************** RMA *************/
+
+/*
+ * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * Each RMA has to be physically contiguous and of a size that the
+ * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
+ * and other larger sizes. Since we are unlikely to be allocate that
+ * much physically contiguous memory after the system is up and running,
+ * we preallocate a set of RMAs in early boot for KVM to use.
+ */
+static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
+static unsigned long kvm_rma_count;
+
+/* Work out RMLS (real mode limit selector) field value for a given RMA size.
+ Assumes POWER7 or PPC970. */
+static inline int lpcr_rmls(unsigned long rma_size)
+{
+ switch (rma_size) {
+ case 32ul << 20: /* 32 MB */
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return 8; /* only supported on POWER7 */
+ return -1;
+ case 64ul << 20: /* 64 MB */
+ return 3;
+ case 128ul << 20: /* 128 MB */
+ return 7;
+ case 256ul << 20: /* 256 MB */
+ return 4;
+ case 1ul << 30: /* 1 GB */
+ return 2;
+ case 16ul << 30: /* 16 GB */
+ return 1;
+ case 256ul << 30: /* 256 GB */
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int __init early_parse_rma_size(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_rma_size = memparse(p, &p);
+
+ return 0;
+}
+early_param("kvm_rma_size", early_parse_rma_size);
+
+static int __init early_parse_rma_count(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_rma_count = simple_strtoul(p, NULL, 0);
+
+ return 0;
+}
+early_param("kvm_rma_count", early_parse_rma_count);
+
+struct kvmppc_linear_info *kvm_alloc_rma(void)
+{
+ return kvm_alloc_linear(KVM_LINEAR_RMA);
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_rma);
+
+void kvm_release_rma(struct kvmppc_linear_info *ri)
+{
+ kvm_release_linear(ri);
+}
+EXPORT_SYMBOL_GPL(kvm_release_rma);
+
+/*************** HPT *************/
+
+/*
+ * This maintains a list of big linear HPT tables that contain the GVA->HPA
+ * memory mappings. If we don't reserve those early on, we might not be able
+ * to get a big (usually 16MB) linear memory region from the kernel anymore.
+ */
+
+static unsigned long kvm_hpt_count;
+
+static int __init early_parse_hpt_count(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_hpt_count = simple_strtoul(p, NULL, 0);
+
+ return 0;
+}
+early_param("kvm_hpt_count", early_parse_hpt_count);
+
+struct kvmppc_linear_info *kvm_alloc_hpt(void)
+{
+ return kvm_alloc_linear(KVM_LINEAR_HPT);
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
+
+void kvm_release_hpt(struct kvmppc_linear_info *li)
+{
+ kvm_release_linear(li);
+}
+EXPORT_SYMBOL_GPL(kvm_release_hpt);
+
+/*************** generic *************/
+
+static LIST_HEAD(free_linears);
+static DEFINE_SPINLOCK(linear_lock);
+
+static void __init kvm_linear_init_one(ulong size, int count, int type)
+{
+ unsigned long i;
+ unsigned long j, npages;
+ void *linear;
+ struct page *pg;
+ const char *typestr;
+ struct kvmppc_linear_info *linear_info;
+
+ if (!count)
+ return;
+
+ typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
+
+ npages = size >> PAGE_SHIFT;
+ linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
+ for (i = 0; i < count; ++i) {
+ linear = alloc_bootmem_align(size, size);
+ pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
+ size >> 20);
+ linear_info[i].base_virt = linear;
+ linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
+ linear_info[i].npages = npages;
+ linear_info[i].type = type;
+ list_add_tail(&linear_info[i].list, &free_linears);
+ atomic_set(&linear_info[i].use_count, 0);
+
+ pg = pfn_to_page(linear_info[i].base_pfn);
+ for (j = 0; j < npages; ++j) {
+ atomic_inc(&pg->_count);
+ ++pg;
+ }
+ }
+}
+
+static struct kvmppc_linear_info *kvm_alloc_linear(int type)
+{
+ struct kvmppc_linear_info *ri, *ret;
+
+ ret = NULL;
+ spin_lock(&linear_lock);
+ list_for_each_entry(ri, &free_linears, list) {
+ if (ri->type != type)
+ continue;
+
+ list_del(&ri->list);
+ atomic_inc(&ri->use_count);
+ memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
+ ret = ri;
+ break;
+ }
+ spin_unlock(&linear_lock);
+ return ret;
+}
+
+static void kvm_release_linear(struct kvmppc_linear_info *ri)
+{
+ if (atomic_dec_and_test(&ri->use_count)) {
+ spin_lock(&linear_lock);
+ list_add_tail(&ri->list, &free_linears);
+ spin_unlock(&linear_lock);
+
+ }
+}
+
+/*
+ * Called at boot time while the bootmem allocator is active,
+ * to allocate contiguous physical memory for the hash page
+ * tables for guests.
+ */
+void __init kvm_linear_init(void)
+{
+ /* HPT */
+ kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
+
+ /* RMA */
+ /* Only do this on PPC970 in HV mode */
+ if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+ !cpu_has_feature(CPU_FTR_ARCH_201))
+ return;
+
+ if (!kvm_rma_size || !kvm_rma_count)
+ return;
+
+ /* Check that the requested size is one supported in hardware */
+ if (lpcr_rmls(kvm_rma_size) < 0) {
+ pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+ return;
+ }
+
+ kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
+}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
new file mode 100644
index 00000000..d3fb4df0
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -0,0 +1,170 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_interrupts.S, which is:
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+#include <asm/ppc-opcode.h>
+
+/*****************************************************************************
+ * *
+ * Guest entry / exit code that is in kernel module memory (vmalloc) *
+ * *
+ ****************************************************************************/
+
+/* Registers:
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcore_entry)
+
+ /* Write correct stack frame */
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+
+ /* Save host state to the stack */
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+
+ /* Save non-volatile registers (r14 - r31) and CR */
+ SAVE_NVGPRS(r1)
+ mfcr r3
+ std r3, _CCR(r1)
+
+ /* Save host DSCR */
+BEGIN_FTR_SECTION
+ mfspr r3, SPRN_DSCR
+ std r3, HSTATE_DSCR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save host DABR */
+ mfspr r3, SPRN_DABR
+ std r3, HSTATE_DABR(r13)
+
+ /* Hard-disable interrupts */
+ mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
+ rldicl r10,r10,48,1
+ rotldi r10,r10,16
+ mtmsrd r10,1
+
+ /* Save host PMU registers and load guest PMU registers */
+ /* R4 is live here (vcpu pointer) but not r3 or r5 */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r7, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
+ isync
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r5, LPPACA_PMCINUSE(r3)
+ cmpwi r5, 0
+ beq 31f /* skip if not */
+ mfspr r5, SPRN_MMCR1
+ mfspr r6, SPRN_MMCRA
+ std r7, HSTATE_MMCR(r13)
+ std r5, HSTATE_MMCR + 8(r13)
+ std r6, HSTATE_MMCR + 16(r13)
+ mfspr r3, SPRN_PMC1
+ mfspr r5, SPRN_PMC2
+ mfspr r6, SPRN_PMC3
+ mfspr r7, SPRN_PMC4
+ mfspr r8, SPRN_PMC5
+ mfspr r9, SPRN_PMC6
+BEGIN_FTR_SECTION
+ mfspr r10, SPRN_PMC7
+ mfspr r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ stw r3, HSTATE_PMC(r13)
+ stw r5, HSTATE_PMC + 4(r13)
+ stw r6, HSTATE_PMC + 8(r13)
+ stw r7, HSTATE_PMC + 12(r13)
+ stw r8, HSTATE_PMC + 16(r13)
+ stw r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+ stw r10, HSTATE_PMC + 24(r13)
+ stw r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+31:
+
+ /*
+ * Put whatever is in the decrementer into the
+ * hypervisor decrementer.
+ */
+ mfspr r8,SPRN_DEC
+ mftb r7
+ mtspr SPRN_HDEC,r8
+ extsw r8,r8
+ add r8,r8,r7
+ std r8,HSTATE_DECEXP(r13)
+
+ /*
+ * On PPC970, if the guest vcpu has an external interrupt pending,
+ * send ourselves an IPI so as to interrupt the guest once it
+ * enables interrupts. (It must have interrupts disabled,
+ * otherwise we would already have delivered the interrupt.)
+ */
+BEGIN_FTR_SECTION
+ ld r0, VCPU_PENDING_EXC(r4)
+ li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
+ oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+ and. r0, r0, r7
+ beq 32f
+ mr r31, r4
+ lhz r3, PACAPACAINDEX(r13)
+ bl smp_send_reschedule
+ nop
+ mr r4, r31
+32:
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+
+ /* Jump to partition switch code */
+ bl .kvmppc_hv_entry_trampoline
+ nop
+
+/*
+ * We return here in virtual mode after the guest exits
+ * with something that we can't handle in real mode.
+ * Interrupts are enabled again at this point.
+ */
+
+.global kvmppc_handler_highmem
+kvmppc_handler_highmem:
+
+ /*
+ * Register usage at this point:
+ *
+ * R1 = host R1
+ * R2 = host R2
+ * R12 = exit handler id
+ * R13 = PACA
+ */
+
+ /* Restore non-volatile host registers (r14 - r31) and CR */
+ REST_NVGPRS(r1)
+ ld r4, _CCR(r1)
+ mtcr r4
+
+ addi r1, r1, SWITCH_FRAME_SIZE
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
new file mode 100644
index 00000000..cec4dadd
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -0,0 +1,817 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+/* Translate address of a vmalloc'd thing to a linear map address */
+static void *real_vmalloc_addr(void *x)
+{
+ unsigned long addr = (unsigned long) x;
+ pte_t *p;
+
+ p = find_linux_pte(swapper_pg_dir, addr);
+ if (!p || !pte_present(*p))
+ return NULL;
+ /* assume we don't have huge pages in vmalloc space... */
+ addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
+ return __va(addr);
+}
+
+/*
+ * Add this HPTE into the chain for the real page.
+ * Must be called with the chain locked; it unlocks the chain.
+ */
+void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+ unsigned long *rmap, long pte_index, int realmode)
+{
+ struct revmap_entry *head, *tail;
+ unsigned long i;
+
+ if (*rmap & KVMPPC_RMAP_PRESENT) {
+ i = *rmap & KVMPPC_RMAP_INDEX;
+ head = &kvm->arch.revmap[i];
+ if (realmode)
+ head = real_vmalloc_addr(head);
+ tail = &kvm->arch.revmap[head->back];
+ if (realmode)
+ tail = real_vmalloc_addr(tail);
+ rev->forw = i;
+ rev->back = head->back;
+ tail->forw = pte_index;
+ head->back = pte_index;
+ } else {
+ rev->forw = rev->back = pte_index;
+ i = pte_index;
+ }
+ smp_wmb();
+ *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+}
+EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+
+/* Remove this HPTE from the chain for a real page */
+static void remove_revmap_chain(struct kvm *kvm, long pte_index,
+ struct revmap_entry *rev,
+ unsigned long hpte_v, unsigned long hpte_r)
+{
+ struct revmap_entry *next, *prev;
+ unsigned long gfn, ptel, head;
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ unsigned long rcbits;
+
+ rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
+ ptel = rev->guest_rpte |= rcbits;
+ gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return;
+
+ rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+ lock_rmap(rmap);
+
+ head = *rmap & KVMPPC_RMAP_INDEX;
+ next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
+ prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
+ next->back = rev->back;
+ prev->forw = rev->forw;
+ if (head == pte_index) {
+ head = rev->forw;
+ if (head == pte_index)
+ *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ else
+ *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
+ }
+ *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ unlock_rmap(rmap);
+}
+
+static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
+ int writing, unsigned long *pte_sizep)
+{
+ pte_t *ptep;
+ unsigned long ps = *pte_sizep;
+ unsigned int shift;
+
+ ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
+ if (!ptep)
+ return __pte(0);
+ if (shift)
+ *pte_sizep = 1ul << shift;
+ else
+ *pte_sizep = PAGE_SIZE;
+ if (ps > *pte_sizep)
+ return __pte(0);
+ if (!pte_present(*ptep))
+ return __pte(0);
+ return kvmppc_read_update_linux_pte(ptep, writing);
+}
+
+static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
+{
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hpte[0] = hpte_v;
+}
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long i, pa, gpa, gfn, psize;
+ unsigned long slot_fn, hva;
+ unsigned long *hpte;
+ struct revmap_entry *rev;
+ unsigned long g_ptel = ptel;
+ struct kvm_memory_slot *memslot;
+ unsigned long *physp, pte_size;
+ unsigned long is_io;
+ unsigned long *rmap;
+ pte_t pte;
+ unsigned int writing;
+ unsigned long mmu_seq;
+ unsigned long rcbits;
+ bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
+
+ psize = hpte_page_size(pteh, ptel);
+ if (!psize)
+ return H_PARAMETER;
+ writing = hpte_is_writable(ptel);
+ pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+
+ /* used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ /* Find the memslot (if any) for this address */
+ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ pa = 0;
+ is_io = ~0ul;
+ rmap = NULL;
+ if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
+ /* PPC970 can't do emulated MMIO */
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ return H_PARAMETER;
+ /* Emulated MMIO - mark this with key=31 */
+ pteh |= HPTE_V_ABSENT;
+ ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+ goto do_insert;
+ }
+
+ /* Check if the requested page fits entirely in the memslot. */
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ slot_fn = gfn - memslot->base_gfn;
+ rmap = &memslot->rmap[slot_fn];
+
+ if (!kvm->arch.using_mmu_notifiers) {
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return H_PARAMETER;
+ physp += slot_fn;
+ if (realmode)
+ physp = real_vmalloc_addr(physp);
+ pa = *physp;
+ if (!pa)
+ return H_TOO_HARD;
+ is_io = pa & (HPTE_R_I | HPTE_R_W);
+ pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
+ pa &= PAGE_MASK;
+ } else {
+ /* Translate to host virtual address */
+ hva = gfn_to_hva_memslot(memslot, gfn);
+
+ /* Look up the Linux PTE for the backing page */
+ pte_size = psize;
+ pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
+ if (pte_present(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ is_io = hpte_cache_bits(pte_val(pte));
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ }
+ }
+ if (pte_size < psize)
+ return H_PARAMETER;
+ if (pa && pte_size > psize)
+ pa |= gpa & (pte_size - 1);
+
+ ptel &= ~(HPTE_R_PP0 - psize);
+ ptel |= pa;
+
+ if (pa)
+ pteh |= HPTE_V_VALID;
+ else
+ pteh |= HPTE_V_ABSENT;
+
+ /* Check WIMG */
+ if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
+ if (is_io)
+ return H_PARAMETER;
+ /*
+ * Allow guest to map emulated device memory as
+ * uncacheable, but actually make it cacheable.
+ */
+ ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
+ ptel |= HPTE_R_M;
+ }
+
+ /* Find and lock the HPTEG slot to use */
+ do_insert:
+ if (pte_index >= HPT_NPTE)
+ return H_PARAMETER;
+ if (likely((flags & H_EXACT) == 0)) {
+ pte_index &= ~7UL;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ for (i = 0; i < 8; ++i) {
+ if ((*hpte & HPTE_V_VALID) == 0 &&
+ try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+ HPTE_V_ABSENT))
+ break;
+ hpte += 2;
+ }
+ if (i == 8) {
+ /*
+ * Since try_lock_hpte doesn't retry (not even stdcx.
+ * failures), it could be that there is a free slot
+ * but we transiently failed to lock it. Try again,
+ * actually locking each slot and checking it.
+ */
+ hpte -= 16;
+ for (i = 0; i < 8; ++i) {
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ break;
+ *hpte &= ~HPTE_V_HVLOCK;
+ hpte += 2;
+ }
+ if (i == 8)
+ return H_PTEG_FULL;
+ }
+ pte_index += i;
+ } else {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+ HPTE_V_ABSENT)) {
+ /* Lock the slot and check again */
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ *hpte &= ~HPTE_V_HVLOCK;
+ return H_PTEG_FULL;
+ }
+ }
+ }
+
+ /* Save away the guest's idea of the second HPTE dword */
+ rev = &kvm->arch.revmap[pte_index];
+ if (realmode)
+ rev = real_vmalloc_addr(rev);
+ if (rev)
+ rev->guest_rpte = g_ptel;
+
+ /* Link HPTE into reverse-map chain */
+ if (pteh & HPTE_V_VALID) {
+ if (realmode)
+ rmap = real_vmalloc_addr(rmap);
+ lock_rmap(rmap);
+ /* Check for pending invalidations under the rmap chain lock */
+ if (kvm->arch.using_mmu_notifiers &&
+ mmu_notifier_retry(vcpu, mmu_seq)) {
+ /* inval in progress, write a non-present HPTE */
+ pteh |= HPTE_V_ABSENT;
+ pteh &= ~HPTE_V_VALID;
+ unlock_rmap(rmap);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
+ realmode);
+ /* Only set R/C in real HPTE if already set in *rmap */
+ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+ ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+ }
+ }
+
+ hpte[1] = ptel;
+
+ /* Write the first HPTE dword, unlocking the HPTE and making it valid */
+ eieio();
+ hpte[0] = pteh;
+ asm volatile("ptesync" : : : "memory");
+
+ vcpu->arch.gpr[4] = pte_index;
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
+
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+
+static inline int try_lock_tlbie(unsigned int *lock)
+{
+ unsigned int tmp, old;
+ unsigned int token = LOCK_TOKEN;
+
+ asm volatile("1:lwarx %1,0,%2\n"
+ " cmpwi cr0,%1,0\n"
+ " bne 2f\n"
+ " stwcx. %3,0,%2\n"
+ " bne- 1b\n"
+ " isync\n"
+ "2:"
+ : "=&r" (tmp), "=&r" (old)
+ : "r" (lock), "r" (token)
+ : "cc", "memory");
+ return old == 0;
+}
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long va)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte;
+ unsigned long v, r, rb;
+ struct revmap_entry *rev;
+
+ if (pte_index >= HPT_NPTE)
+ return H_PARAMETER;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+ ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
+ ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
+ hpte[0] &= ~HPTE_V_HVLOCK;
+ return H_NOT_FOUND;
+ }
+
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ if (v & HPTE_V_VALID) {
+ hpte[0] &= ~HPTE_V_VALID;
+ rb = compute_tlbie_rb(v, hpte[1], pte_index);
+ if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile("tlbiel %0" : : "r" (rb));
+ asm volatile("ptesync" : : : "memory");
+ }
+ /* Read PTE low word after tlbie to get final R/C values */
+ remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
+ }
+ r = rev->guest_rpte;
+ unlock_hpte(hpte, 0);
+
+ vcpu->arch.gpr[4] = v;
+ vcpu->arch.gpr[5] = r;
+ return H_SUCCESS;
+}
+
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *args = &vcpu->arch.gpr[4];
+ unsigned long *hp, *hptes[4], tlbrb[4];
+ long int i, j, k, n, found, indexes[4];
+ unsigned long flags, req, pte_index, rcbits;
+ long int local = 0;
+ long int ret = H_SUCCESS;
+ struct revmap_entry *rev, *revs[4];
+
+ if (atomic_read(&kvm->online_vcpus) == 1)
+ local = 1;
+ for (i = 0; i < 4 && ret == H_SUCCESS; ) {
+ n = 0;
+ for (; i < 4; ++i) {
+ j = i * 2;
+ pte_index = args[j];
+ flags = pte_index >> 56;
+ pte_index &= ((1ul << 56) - 1);
+ req = flags >> 6;
+ flags &= 3;
+ if (req == 3) { /* no more requests */
+ i = 4;
+ break;
+ }
+ if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
+ /* parameter error */
+ args[j] = ((0xa0 | flags) << 56) + pte_index;
+ ret = H_PARAMETER;
+ break;
+ }
+ hp = (unsigned long *)
+ (kvm->arch.hpt_virt + (pte_index << 4));
+ /* to avoid deadlock, don't spin except for first */
+ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
+ if (n)
+ break;
+ while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
+ cpu_relax();
+ }
+ found = 0;
+ if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+ switch (flags & 3) {
+ case 0: /* absolute */
+ found = 1;
+ break;
+ case 1: /* andcond */
+ if (!(hp[0] & args[j + 1]))
+ found = 1;
+ break;
+ case 2: /* AVPN */
+ if ((hp[0] & ~0x7fUL) == args[j + 1])
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ hp[0] &= ~HPTE_V_HVLOCK;
+ args[j] = ((0x90 | flags) << 56) + pte_index;
+ continue;
+ }
+
+ args[j] = ((0x80 | flags) << 56) + pte_index;
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+
+ if (!(hp[0] & HPTE_V_VALID)) {
+ /* insert R and C bits from PTE */
+ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+ args[j] |= rcbits << (56 - 5);
+ hp[0] = 0;
+ continue;
+ }
+
+ hp[0] &= ~HPTE_V_VALID; /* leave it locked */
+ tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+ indexes[n] = j;
+ hptes[n] = hp;
+ revs[n] = rev;
+ ++n;
+ }
+
+ if (!n)
+ break;
+
+ /* Now that we've collected a batch, do the tlbies */
+ if (!local) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ for (k = 0; k < n; ++k)
+ asm volatile(PPC_TLBIE(%1,%0) : :
+ "r" (tlbrb[k]),
+ "r" (kvm->arch.lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ for (k = 0; k < n; ++k)
+ asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
+ asm volatile("ptesync" : : : "memory");
+ }
+
+ /* Read PTE low words after tlbie to get final R/C values */
+ for (k = 0; k < n; ++k) {
+ j = indexes[k];
+ pte_index = args[j] & ((1ul << 56) - 1);
+ hp = hptes[k];
+ rev = revs[k];
+ remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
+ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+ args[j] |= rcbits << (56 - 5);
+ hp[0] = 0;
+ }
+ }
+
+ return ret;
+}
+
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long va)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte;
+ struct revmap_entry *rev;
+ unsigned long v, r, rb, mask, bits;
+
+ if (pte_index >= HPT_NPTE)
+ return H_PARAMETER;
+
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+ ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
+ hpte[0] &= ~HPTE_V_HVLOCK;
+ return H_NOT_FOUND;
+ }
+
+ if (atomic_read(&kvm->online_vcpus) == 1)
+ flags |= H_LOCAL;
+ v = hpte[0];
+ bits = (flags << 55) & HPTE_R_PP0;
+ bits |= (flags << 48) & HPTE_R_KEY_HI;
+ bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+ /* Update guest view of 2nd HPTE dword */
+ mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
+ HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ if (rev) {
+ r = (rev->guest_rpte & ~mask) | bits;
+ rev->guest_rpte = r;
+ }
+ r = (hpte[1] & ~mask) | bits;
+
+ /* Update HPTE */
+ if (v & HPTE_V_VALID) {
+ rb = compute_tlbie_rb(v, r, pte_index);
+ hpte[0] = v & ~HPTE_V_VALID;
+ if (!(flags & H_LOCAL)) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile("tlbiel %0" : : "r" (rb));
+ asm volatile("ptesync" : : : "memory");
+ }
+ }
+ hpte[1] = r;
+ eieio();
+ hpte[0] = v & ~HPTE_V_HVLOCK;
+ asm volatile("ptesync" : : : "memory");
+ return H_SUCCESS;
+}
+
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte, v, r;
+ int i, n = 1;
+ struct revmap_entry *rev = NULL;
+
+ if (pte_index >= HPT_NPTE)
+ return H_PARAMETER;
+ if (flags & H_READ_4) {
+ pte_index &= ~3;
+ n = 4;
+ }
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ for (i = 0; i < n; ++i, ++pte_index) {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ r = hpte[1];
+ if (v & HPTE_V_ABSENT) {
+ v &= ~HPTE_V_ABSENT;
+ v |= HPTE_V_VALID;
+ }
+ if (v & HPTE_V_VALID)
+ r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+ vcpu->arch.gpr[4 + i * 2] = v;
+ vcpu->arch.gpr[5 + i * 2] = r;
+ }
+ return H_SUCCESS;
+}
+
+void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index)
+{
+ unsigned long rb;
+
+ hptep[0] &= ~HPTE_V_VALID;
+ rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
+
+void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index)
+{
+ unsigned long rb;
+ unsigned char rbyte;
+
+ rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+ rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
+ /* modify only the second-last byte, which contains the ref bit */
+ *((char *)hptep + 14) = rbyte;
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
+
+static int slb_base_page_shift[4] = {
+ 24, /* 16M */
+ 16, /* 64k */
+ 34, /* 16G */
+ 20, /* 1M, unsupported */
+};
+
+long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
+ unsigned long valid)
+{
+ unsigned int i;
+ unsigned int pshift;
+ unsigned long somask;
+ unsigned long vsid, hash;
+ unsigned long avpn;
+ unsigned long *hpte;
+ unsigned long mask, val;
+ unsigned long v, r;
+
+ /* Get page shift, work out hash and AVPN etc. */
+ mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
+ val = 0;
+ pshift = 12;
+ if (slb_v & SLB_VSID_L) {
+ mask |= HPTE_V_LARGE;
+ val |= HPTE_V_LARGE;
+ pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
+ }
+ if (slb_v & SLB_VSID_B_1T) {
+ somask = (1UL << 40) - 1;
+ vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
+ vsid ^= vsid << 25;
+ } else {
+ somask = (1UL << 28) - 1;
+ vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
+ }
+ hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
+ avpn = slb_v & ~(somask >> 16); /* also includes B */
+ avpn |= (eaddr & somask) >> 16;
+
+ if (pshift >= 24)
+ avpn &= ~((1UL << (pshift - 16)) - 1);
+ else
+ avpn &= ~0x7fUL;
+ val |= avpn;
+
+ for (;;) {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
+
+ for (i = 0; i < 16; i += 2) {
+ /* Read the PTE racily */
+ v = hpte[i] & ~HPTE_V_HVLOCK;
+
+ /* Check valid/absent, hash, segment size and AVPN */
+ if (!(v & valid) || (v & mask) != val)
+ continue;
+
+ /* Lock the PTE and read it under the lock */
+ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
+ cpu_relax();
+ v = hpte[i] & ~HPTE_V_HVLOCK;
+ r = hpte[i+1];
+
+ /*
+ * Check the HPTE again, including large page size
+ * Since we don't currently allow any MPSS (mixed
+ * page-size segment) page sizes, it is sufficient
+ * to check against the actual page size.
+ */
+ if ((v & valid) && (v & mask) == val &&
+ hpte_page_size(v, r) == (1ul << pshift))
+ /* Return with the HPTE still locked */
+ return (hash << 3) + (i >> 1);
+
+ /* Unlock and move on */
+ hpte[i] = v;
+ }
+
+ if (val & HPTE_V_SECONDARY)
+ break;
+ val |= HPTE_V_SECONDARY;
+ hash = hash ^ HPT_HASH_MASK;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
+
+/*
+ * Called in real mode to check whether an HPTE not found fault
+ * is due to accessing a paged-out page or an emulated MMIO page,
+ * or if a protection fault is due to accessing a page that the
+ * guest wanted read/write access to but which we made read-only.
+ * Returns a possibly modified status (DSISR) value if not
+ * (i.e. pass the interrupt to the guest),
+ * -1 to pass the fault up to host kernel mode code, -2 to do that
+ * and also load the instruction word (for MMIO emulation),
+ * or 0 if we should make the guest retry the access.
+ */
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+ unsigned long slb_v, unsigned int status, bool data)
+{
+ struct kvm *kvm = vcpu->kvm;
+ long int index;
+ unsigned long v, r, gr;
+ unsigned long *hpte;
+ unsigned long valid;
+ struct revmap_entry *rev;
+ unsigned long pp, key;
+
+ /* For protection fault, expect to find a valid HPTE */
+ valid = HPTE_V_VALID;
+ if (status & DSISR_NOHPTE)
+ valid |= HPTE_V_ABSENT;
+
+ index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ r = hpte[1];
+ rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
+ gr = rev->guest_rpte;
+
+ unlock_hpte(hpte, v);
+
+ /* For not found, if the HPTE is valid by now, retry the instruction */
+ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
+ return 0;
+
+ /* Check access permissions to the page */
+ pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+ key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
+ if (!data) {
+ if (gr & (HPTE_R_N | HPTE_R_G))
+ return status | SRR1_ISI_N_OR_G;
+ if (!hpte_read_permission(pp, slb_v & key))
+ return status | SRR1_ISI_PROT;
+ } else if (status & DSISR_ISSTORE) {
+ /* check write permission */
+ if (!hpte_write_permission(pp, slb_v & key))
+ return status | DSISR_PROTFAULT;
+ } else {
+ if (!hpte_read_permission(pp, slb_v & key))
+ return status | DSISR_PROTFAULT;
+ }
+
+ /* Check storage key, if applicable */
+ if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
+ unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
+ if (status & DSISR_ISSTORE)
+ perm >>= 1;
+ if (perm & 1)
+ return status | DSISR_KEYFAULT;
+ }
+
+ /* Save HPTE info for virtual-mode handler */
+ vcpu->arch.pgfault_addr = addr;
+ vcpu->arch.pgfault_index = index;
+ vcpu->arch.pgfault_hpte[0] = v;
+ vcpu->arch.pgfault_hpte[1] = r;
+
+ /* Check the storage key to see if it is possibly emulated MMIO */
+ if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
+ (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
+ return -2; /* MMIO emulation - load instr word */
+
+ return -1; /* send fault up to host kernel mode */
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
new file mode 100644
index 00000000..24b23a43
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -0,0 +1,1712 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_rmhandlers.S and other files, which are:
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/hvcall.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+
+/*****************************************************************************
+ * *
+ * Real Mode handlers that need to be in the linear mapping *
+ * *
+ ****************************************************************************/
+
+ .globl kvmppc_skip_interrupt
+kvmppc_skip_interrupt:
+ mfspr r13,SPRN_SRR0
+ addi r13,r13,4
+ mtspr SPRN_SRR0,r13
+ GET_SCRATCH0(r13)
+ rfid
+ b .
+
+ .globl kvmppc_skip_Hinterrupt
+kvmppc_skip_Hinterrupt:
+ mfspr r13,SPRN_HSRR0
+ addi r13,r13,4
+ mtspr SPRN_HSRR0,r13
+ GET_SCRATCH0(r13)
+ hrfid
+ b .
+
+/*
+ * Call kvmppc_hv_entry in real mode.
+ * Must be called with interrupts hard-disabled.
+ *
+ * Input Registers:
+ *
+ * LR = return address to continue at after eventually re-enabling MMU
+ */
+_GLOBAL(kvmppc_hv_entry_trampoline)
+ mfmsr r10
+ LOAD_REG_ADDR(r5, kvmppc_hv_entry)
+ li r0,MSR_RI
+ andc r0,r10,r0
+ li r6,MSR_IR | MSR_DR
+ andc r6,r10,r6
+ mtmsrd r0,1 /* clear RI in MSR */
+ mtsrr0 r5
+ mtsrr1 r6
+ RFI
+
+#define ULONG_SIZE 8
+#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
+
+/******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
+
+#define XICS_XIRR 4
+#define XICS_QIRR 0xc
+
+/*
+ * We come in here when wakened from nap mode on a secondary hw thread.
+ * Relocation is off and most register values are lost.
+ * r13 points to the PACA.
+ */
+ .globl kvm_start_guest
+kvm_start_guest:
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
+ ld r2,PACATOC(r13)
+
+ /* were we napping due to cede? */
+ lbz r0,HSTATE_NAPPING(r13)
+ cmpwi r0,0
+ bne kvm_end_cede
+
+ /* get vcpu pointer */
+ ld r4, HSTATE_KVM_VCPU(r13)
+
+ /* We got here with an IPI; clear it */
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r0, 0xff
+ li r6, XICS_QIRR
+ li r7, XICS_XIRR
+ lwzcix r8, r5, r7 /* ack the interrupt */
+ sync
+ stbcix r0, r5, r6 /* clear it */
+ stwcix r8, r5, r7 /* EOI it */
+
+ /* NV GPR values from power7_idle() will no longer be valid */
+ stb r0, PACA_NAPSTATELOST(r13)
+
+.global kvmppc_hv_entry
+kvmppc_hv_entry:
+
+ /* Required state:
+ *
+ * R4 = vcpu pointer
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R1 = host R1
+ * all other volatile GPRS = free
+ */
+ mflr r0
+ std r0, HSTATE_VMHANDLER(r13)
+
+ ld r14, VCPU_GPR(r14)(r4)
+ ld r15, VCPU_GPR(r15)(r4)
+ ld r16, VCPU_GPR(r16)(r4)
+ ld r17, VCPU_GPR(r17)(r4)
+ ld r18, VCPU_GPR(r18)(r4)
+ ld r19, VCPU_GPR(r19)(r4)
+ ld r20, VCPU_GPR(r20)(r4)
+ ld r21, VCPU_GPR(r21)(r4)
+ ld r22, VCPU_GPR(r22)(r4)
+ ld r23, VCPU_GPR(r23)(r4)
+ ld r24, VCPU_GPR(r24)(r4)
+ ld r25, VCPU_GPR(r25)(r4)
+ ld r26, VCPU_GPR(r26)(r4)
+ ld r27, VCPU_GPR(r27)(r4)
+ ld r28, VCPU_GPR(r28)(r4)
+ ld r29, VCPU_GPR(r29)(r4)
+ ld r30, VCPU_GPR(r30)(r4)
+ ld r31, VCPU_GPR(r31)(r4)
+
+ /* Load guest PMU registers */
+ /* R4 is live here (vcpu pointer) */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+ lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
+ lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
+ lwz r6, VCPU_PMC + 8(r4)
+ lwz r7, VCPU_PMC + 12(r4)
+ lwz r8, VCPU_PMC + 16(r4)
+ lwz r9, VCPU_PMC + 20(r4)
+BEGIN_FTR_SECTION
+ lwz r10, VCPU_PMC + 24(r4)
+ lwz r11, VCPU_PMC + 28(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r5
+ mtspr SPRN_PMC3, r6
+ mtspr SPRN_PMC4, r7
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, VCPU_MMCR(r4)
+ ld r5, VCPU_MMCR + 8(r4)
+ ld r6, VCPU_MMCR + 16(r4)
+ mtspr SPRN_MMCR1, r5
+ mtspr SPRN_MMCRA, r6
+ mtspr SPRN_MMCR0, r3
+ isync
+
+ /* Load up FP, VMX and VSX registers */
+ bl kvmppc_load_fp
+
+BEGIN_FTR_SECTION
+ /* Switch DSCR to guest value */
+ ld r5, VCPU_DSCR(r4)
+ mtspr SPRN_DSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /*
+ * Set the decrementer to the guest decrementer.
+ */
+ ld r8,VCPU_DEC_EXPIRES(r4)
+ mftb r7
+ subf r3,r7,r8
+ mtspr SPRN_DEC,r3
+ stw r3,VCPU_DEC(r4)
+
+ ld r5, VCPU_SPRG0(r4)
+ ld r6, VCPU_SPRG1(r4)
+ ld r7, VCPU_SPRG2(r4)
+ ld r8, VCPU_SPRG3(r4)
+ mtspr SPRN_SPRG0, r5
+ mtspr SPRN_SPRG1, r6
+ mtspr SPRN_SPRG2, r7
+ mtspr SPRN_SPRG3, r8
+
+ /* Save R1 in the PACA */
+ std r1, HSTATE_HOST_R1(r13)
+
+ /* Increment yield count if they have a VPA */
+ ld r3, VCPU_VPA(r4)
+ cmpdi r3, 0
+ beq 25f
+ lwz r5, LPPACA_YIELDCOUNT(r3)
+ addi r5, r5, 1
+ stw r5, LPPACA_YIELDCOUNT(r3)
+25:
+ /* Load up DAR and DSISR */
+ ld r5, VCPU_DAR(r4)
+ lwz r6, VCPU_DSISR(r4)
+ mtspr SPRN_DAR, r5
+ mtspr SPRN_DSISR, r6
+
+ /* Set partition DABR */
+ li r5,3
+ ld r6,VCPU_DABR(r4)
+ mtspr SPRN_DABRX,r5
+ mtspr SPRN_DABR,r6
+
+BEGIN_FTR_SECTION
+ /* Restore AMR and UAMOR, set AMOR to all 1s */
+ ld r5,VCPU_AMR(r4)
+ ld r6,VCPU_UAMOR(r4)
+ li r7,-1
+ mtspr SPRN_AMR,r5
+ mtspr SPRN_UAMOR,r6
+ mtspr SPRN_AMOR,r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Clear out SLB */
+ li r6,0
+ slbmte r6,r6
+ slbia
+ ptesync
+
+BEGIN_FTR_SECTION
+ b 30f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ /*
+ * POWER7 host -> guest partition switch code.
+ * We don't have to lock against concurrent tlbies,
+ * but we do have to coordinate across hardware threads.
+ */
+ /* Increment entry count iff exit count is zero. */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ addi r9,r5,VCORE_ENTRY_EXIT
+21: lwarx r3,0,r9
+ cmpwi r3,0x100 /* any threads starting to exit? */
+ bge secondary_too_late /* if so we're too late to the party */
+ addi r3,r3,1
+ stwcx. r3,0,r9
+ bne 21b
+
+ /* Primary thread switches to guest partition. */
+ ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
+ lwz r6,VCPU_PTID(r4)
+ cmpwi r6,0
+ bne 20f
+ ld r6,KVM_SDR1(r9)
+ lwz r7,KVM_LPID(r9)
+ li r0,LPID_RSVD /* switch to reserved LPID */
+ mtspr SPRN_LPID,r0
+ ptesync
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_LPID,r7
+ isync
+ li r0,1
+ stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
+ b 10f
+
+ /* Secondary threads wait for primary to have done partition switch */
+20: lbz r0,VCORE_IN_GUEST(r5)
+ cmpwi r0,0
+ beq 20b
+
+ /* Set LPCR and RMOR. */
+10: ld r8,KVM_LPCR(r9)
+ mtspr SPRN_LPCR,r8
+ ld r8,KVM_RMOR(r9)
+ mtspr SPRN_RMOR,r8
+ isync
+
+ /* Check if HDEC expires soon */
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,10
+ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ mr r9,r4
+ blt hdec_soon
+
+ /*
+ * Invalidate the TLB if we could possibly have stale TLB
+ * entries for this partition on this core due to the use
+ * of tlbiel.
+ * XXX maybe only need this on primary thread?
+ */
+ ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
+ lwz r5,VCPU_VCPUID(r4)
+ lhz r6,PACAPACAINDEX(r13)
+ rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
+ lhz r8,VCPU_LAST_CPU(r4)
+ sldi r7,r6,1 /* see if this is the same vcpu */
+ add r7,r7,r9 /* as last ran on this pcpu */
+ lhz r0,KVM_LAST_VCPU(r7)
+ cmpw r6,r8 /* on the same cpu core as last time? */
+ bne 3f
+ cmpw r0,r5 /* same vcpu as this core last ran? */
+ beq 1f
+3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
+ sth r5,KVM_LAST_VCPU(r7)
+ li r6,128
+ mtctr r6
+ li r7,0x800 /* IS field = 0b10 */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1:
+
+ /* Save purr/spurr */
+ mfspr r5,SPRN_PURR
+ mfspr r6,SPRN_SPURR
+ std r5,HSTATE_PURR(r13)
+ std r6,HSTATE_SPURR(r13)
+ ld r7,VCPU_PURR(r4)
+ ld r8,VCPU_SPURR(r4)
+ mtspr SPRN_PURR,r7
+ mtspr SPRN_SPURR,r8
+ b 31f
+
+ /*
+ * PPC970 host -> guest partition switch code.
+ * We have to lock against concurrent tlbies,
+ * using native_tlbie_lock to lock against host tlbies
+ * and kvm->arch.tlbie_lock to lock against guest tlbies.
+ * We also have to invalidate the TLB since its
+ * entries aren't tagged with the LPID.
+ */
+30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
+
+ /* first take native_tlbie_lock */
+ .section ".toc","aw"
+toc_tlbie_lock:
+ .tc native_tlbie_lock[TC],native_tlbie_lock
+ .previous
+ ld r3,toc_tlbie_lock@toc(2)
+ lwz r8,PACA_LOCK_TOKEN(r13)
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */
+ li r0,0x18f
+ rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
+ or r0,r7,r0
+ ptesync
+ sync
+ mtspr SPRN_HID4,r0 /* switch to reserved LPID */
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop native_tlbie_lock */
+
+ /* invalidate the whole TLB */
+ li r0,256
+ mtctr r0
+ li r6,0
+25: tlbiel r6
+ addi r6,r6,0x1000
+ bdnz 25b
+ ptesync
+
+ /* Take the guest's tlbie_lock */
+ addi r3,r9,KVM_TLBIE_LOCK
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+ ld r6,KVM_SDR1(r9)
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+
+ /* Set up HID4 with the guest's LPID etc. */
+ sync
+ mtspr SPRN_HID4,r7
+ isync
+
+ /* drop the guest's tlbie_lock */
+ li r0,0
+ stw r0,0(r3)
+
+ /* Check if HDEC expires soon */
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,10
+ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ mr r9,r4
+ blt hdec_soon
+
+ /* Enable HDEC interrupts */
+ mfspr r0,SPRN_HID0
+ li r3,1
+ rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
+ sync
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+
+ /* Load up guest SLB entries */
+31: lwz r5,VCPU_SLB_MAX(r4)
+ cmpwi r5,0
+ beq 9f
+ mtctr r5
+ addi r6,r4,VCPU_SLB
+1: ld r8,VCPU_SLB_E(r6)
+ ld r9,VCPU_SLB_V(r6)
+ slbmte r9,r8
+ addi r6,r6,VCPU_SLB_SIZE
+ bdnz 1b
+9:
+
+ /* Restore state of CTRL run bit; assume 1 on entry */
+ lwz r5,VCPU_CTRL(r4)
+ andi. r5,r5,1
+ bne 4f
+ mfspr r6,SPRN_CTRLF
+ clrrdi r6,r6,1
+ mtspr SPRN_CTRLT,r6
+4:
+ ld r6, VCPU_CTR(r4)
+ lwz r7, VCPU_XER(r4)
+
+ mtctr r6
+ mtxer r7
+
+kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
+ ld r6, VCPU_SRR0(r4)
+ ld r7, VCPU_SRR1(r4)
+ ld r10, VCPU_PC(r4)
+ ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
+
+ rldicl r11, r11, 63 - MSR_HV_LG, 1
+ rotldi r11, r11, 1 + MSR_HV_LG
+ ori r11, r11, MSR_ME
+
+ /* Check if we can deliver an external or decrementer interrupt now */
+ ld r0,VCPU_PENDING_EXC(r4)
+ li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
+ oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+ and r0,r0,r8
+ cmpdi cr1,r0,0
+ andi. r0,r11,MSR_EE
+ beq cr1,11f
+BEGIN_FTR_SECTION
+ mfspr r8,SPRN_LPCR
+ ori r8,r8,LPCR_MER
+ mtspr SPRN_LPCR,r8
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ beq 5f
+ li r0,BOOK3S_INTERRUPT_EXTERNAL
+12: mr r6,r10
+ mr r10,r0
+ mr r7,r11
+ li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11,r11,63
+ b 5f
+11: beq 5f
+ mfspr r0,SPRN_DEC
+ cmpwi r0,0
+ li r0,BOOK3S_INTERRUPT_DECREMENTER
+ blt 12b
+
+ /* Move SRR0 and SRR1 into the respective regs */
+5: mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r7
+ li r0,0
+ stb r0,VCPU_CEDED(r4) /* cancel cede */
+
+fast_guest_return:
+ mtspr SPRN_HSRR0,r10
+ mtspr SPRN_HSRR1,r11
+
+ /* Activate guest mode, so faults get handled by KVM */
+ li r9, KVM_GUEST_MODE_GUEST
+ stb r9, HSTATE_IN_GUEST(r13)
+
+ /* Enter guest */
+
+ ld r5, VCPU_LR(r4)
+ lwz r6, VCPU_CR(r4)
+ mtlr r5
+ mtcr r6
+
+ ld r0, VCPU_GPR(r0)(r4)
+ ld r1, VCPU_GPR(r1)(r4)
+ ld r2, VCPU_GPR(r2)(r4)
+ ld r3, VCPU_GPR(r3)(r4)
+ ld r5, VCPU_GPR(r5)(r4)
+ ld r6, VCPU_GPR(r6)(r4)
+ ld r7, VCPU_GPR(r7)(r4)
+ ld r8, VCPU_GPR(r8)(r4)
+ ld r9, VCPU_GPR(r9)(r4)
+ ld r10, VCPU_GPR(r10)(r4)
+ ld r11, VCPU_GPR(r11)(r4)
+ ld r12, VCPU_GPR(r12)(r4)
+ ld r13, VCPU_GPR(r13)(r4)
+
+ ld r4, VCPU_GPR(r4)(r4)
+
+ hrfid
+ b .
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+/*
+ * We come here from the first-level interrupt handlers.
+ */
+ .globl kvmppc_interrupt
+kvmppc_interrupt:
+ /*
+ * Register contents:
+ * R12 = interrupt vector
+ * R13 = PACA
+ * guest CR, R12 saved in shadow VCPU SCRATCH1/0
+ * guest R13 saved in SPRN_SCRATCH0
+ */
+ /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
+ std r9, HSTATE_HOST_R2(r13)
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Save registers */
+
+ std r0, VCPU_GPR(r0)(r9)
+ std r1, VCPU_GPR(r1)(r9)
+ std r2, VCPU_GPR(r2)(r9)
+ std r3, VCPU_GPR(r3)(r9)
+ std r4, VCPU_GPR(r4)(r9)
+ std r5, VCPU_GPR(r5)(r9)
+ std r6, VCPU_GPR(r6)(r9)
+ std r7, VCPU_GPR(r7)(r9)
+ std r8, VCPU_GPR(r8)(r9)
+ ld r0, HSTATE_HOST_R2(r13)
+ std r0, VCPU_GPR(r9)(r9)
+ std r10, VCPU_GPR(r10)(r9)
+ std r11, VCPU_GPR(r11)(r9)
+ ld r3, HSTATE_SCRATCH0(r13)
+ lwz r4, HSTATE_SCRATCH1(r13)
+ std r3, VCPU_GPR(r12)(r9)
+ stw r4, VCPU_CR(r9)
+
+ /* Restore R1/R2 so we can handle faults */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+
+ mfspr r10, SPRN_SRR0
+ mfspr r11, SPRN_SRR1
+ std r10, VCPU_SRR0(r9)
+ std r11, VCPU_SRR1(r9)
+ andi. r0, r12, 2 /* need to read HSRR0/1? */
+ beq 1f
+ mfspr r10, SPRN_HSRR0
+ mfspr r11, SPRN_HSRR1
+ clrrdi r12, r12, 2
+1: std r10, VCPU_PC(r9)
+ std r11, VCPU_MSR(r9)
+
+ GET_SCRATCH0(r3)
+ mflr r4
+ std r3, VCPU_GPR(r13)(r9)
+ std r4, VCPU_LR(r9)
+
+ /* Unset guest mode */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+
+ stw r12,VCPU_TRAP(r9)
+
+ /* Save HEIR (HV emulation assist reg) in last_inst
+ if this is an HEI (HV emulation interrupt, e40) */
+ li r3,KVM_INST_FETCH_FAILED
+BEGIN_FTR_SECTION
+ cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ bne 11f
+ mfspr r3,SPRN_HEIR
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+11: stw r3,VCPU_LAST_INST(r9)
+
+ /* these are volatile across C function calls */
+ mfctr r3
+ mfxer r4
+ std r3, VCPU_CTR(r9)
+ stw r4, VCPU_XER(r9)
+
+BEGIN_FTR_SECTION
+ /* If this is a page table miss then see if it's theirs or ours */
+ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq kvmppc_hdsi
+ cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+ beq kvmppc_hisi
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* See if this is a leftover HDEC interrupt */
+ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ bne 2f
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,0
+ bge ignore_hdec
+2:
+ /* See if this is an hcall we can handle in real mode */
+ cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
+ beq hcall_try_real_mode
+
+ /* Check for mediated interrupts (could be done earlier really ...) */
+BEGIN_FTR_SECTION
+ cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
+ bne+ 1f
+ andi. r0,r11,MSR_EE
+ beq 1f
+ mfspr r5,SPRN_LPCR
+ andi. r0,r5,LPCR_MER
+ bne bounce_ext_interrupt
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+nohpte_cont:
+hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+ /* Save DEC */
+ mfspr r5,SPRN_DEC
+ mftb r6
+ extsw r5,r5
+ add r5,r5,r6
+ std r5,VCPU_DEC_EXPIRES(r9)
+
+ /* Save more register state */
+ mfdar r6
+ mfdsisr r7
+ std r6, VCPU_DAR(r9)
+ stw r7, VCPU_DSISR(r9)
+BEGIN_FTR_SECTION
+ /* don't overwrite fault_dar/fault_dsisr if HDSI */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq 6f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ std r6, VCPU_FAULT_DAR(r9)
+ stw r7, VCPU_FAULT_DSISR(r9)
+
+ /* Save guest CTRL register, set runlatch to 1 */
+6: mfspr r6,SPRN_CTRLF
+ stw r6,VCPU_CTRL(r9)
+ andi. r0,r6,1
+ bne 4f
+ ori r6,r6,1
+ mtspr SPRN_CTRLT,r6
+4:
+ /* Read the guest SLB and save it away */
+ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
+ mtctr r0
+ li r6,0
+ addi r7,r9,VCPU_SLB
+ li r5,0
+1: slbmfee r8,r6
+ andis. r0,r8,SLB_ESID_V@h
+ beq 2f
+ add r8,r8,r6 /* put index in */
+ slbmfev r3,r6
+ std r8,VCPU_SLB_E(r7)
+ std r3,VCPU_SLB_V(r7)
+ addi r7,r7,VCPU_SLB_SIZE
+ addi r5,r5,1
+2: addi r6,r6,1
+ bdnz 1b
+ stw r5,VCPU_SLB_MAX(r9)
+
+ /*
+ * Save the guest PURR/SPURR
+ */
+BEGIN_FTR_SECTION
+ mfspr r5,SPRN_PURR
+ mfspr r6,SPRN_SPURR
+ ld r7,VCPU_PURR(r9)
+ ld r8,VCPU_SPURR(r9)
+ std r5,VCPU_PURR(r9)
+ std r6,VCPU_SPURR(r9)
+ subf r5,r7,r5
+ subf r6,r8,r6
+
+ /*
+ * Restore host PURR/SPURR and add guest times
+ * so that the time in the guest gets accounted.
+ */
+ ld r3,HSTATE_PURR(r13)
+ ld r4,HSTATE_SPURR(r13)
+ add r3,r3,r5
+ add r4,r4,r6
+ mtspr SPRN_PURR,r3
+ mtspr SPRN_SPURR,r4
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
+
+ /* Clear out SLB */
+ li r5,0
+ slbmte r5,r5
+ slbia
+ ptesync
+
+hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */
+BEGIN_FTR_SECTION
+ b 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ /*
+ * POWER7 guest -> host partition switch code.
+ * We don't have to lock against tlbies but we do
+ * have to coordinate the hardware threads.
+ */
+ /* Increment the threads-exiting-guest count in the 0xff00
+ bits of vcore->entry_exit_count */
+ lwsync
+ ld r5,HSTATE_KVM_VCORE(r13)
+ addi r6,r5,VCORE_ENTRY_EXIT
+41: lwarx r3,0,r6
+ addi r0,r3,0x100
+ stwcx. r0,0,r6
+ bne 41b
+ lwsync
+
+ /*
+ * At this point we have an interrupt that we have to pass
+ * up to the kernel or qemu; we can't handle it in real mode.
+ * Thus we have to do a partition switch, so we have to
+ * collect the other threads, if we are the first thread
+ * to take an interrupt. To do this, we set the HDEC to 0,
+ * which causes an HDEC interrupt in all threads within 2ns
+ * because the HDEC register is shared between all 4 threads.
+ * However, we don't need to bother if this is an HDEC
+ * interrupt, since the other threads will already be on their
+ * way here in that case.
+ */
+ cmpwi r3,0x100 /* Are we the first here? */
+ bge 43f
+ cmpwi r3,1 /* Are any other threads in the guest? */
+ ble 43f
+ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ beq 40f
+ li r0,0
+ mtspr SPRN_HDEC,r0
+40:
+ /*
+ * Send an IPI to any napping threads, since an HDEC interrupt
+ * doesn't wake CPUs up from nap.
+ */
+ lwz r3,VCORE_NAPPING_THREADS(r5)
+ lwz r4,VCPU_PTID(r9)
+ li r0,1
+ sld r0,r0,r4
+ andc. r3,r3,r0 /* no sense IPI'ing ourselves */
+ beq 43f
+ mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
+ subf r6,r4,r13
+42: andi. r0,r3,1
+ beq 44f
+ ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
+ li r0,IPI_PRIORITY
+ li r7,XICS_QIRR
+ stbcix r0,r7,r8 /* trigger the IPI */
+44: srdi. r3,r3,1
+ addi r6,r6,PACA_SIZE
+ bne 42b
+
+ /* Secondary threads wait for primary to do partition switch */
+43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ lwz r3,VCPU_PTID(r9)
+ cmpwi r3,0
+ beq 15f
+ HMT_LOW
+13: lbz r3,VCORE_IN_GUEST(r5)
+ cmpwi r3,0
+ bne 13b
+ HMT_MEDIUM
+ b 16f
+
+ /* Primary thread waits for all the secondaries to exit guest */
+15: lwz r3,VCORE_ENTRY_EXIT(r5)
+ srwi r0,r3,8
+ clrldi r3,r3,56
+ cmpw r3,r0
+ bne 15b
+ isync
+
+ /* Primary thread switches back to host partition */
+ ld r6,KVM_HOST_SDR1(r4)
+ lwz r7,KVM_HOST_LPID(r4)
+ li r8,LPID_RSVD /* switch to reserved LPID */
+ mtspr SPRN_LPID,r8
+ ptesync
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_LPID,r7
+ isync
+ li r0,0
+ stb r0,VCORE_IN_GUEST(r5)
+ lis r8,0x7fff /* MAX_INT@h */
+ mtspr SPRN_HDEC,r8
+
+16: ld r8,KVM_HOST_LPCR(r4)
+ mtspr SPRN_LPCR,r8
+ isync
+ b 33f
+
+ /*
+ * PPC970 guest -> host partition switch code.
+ * We have to lock against concurrent tlbies, and
+ * we have to flush the whole TLB.
+ */
+32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
+
+ /* Take the guest's tlbie_lock */
+ lwz r8,PACA_LOCK_TOKEN(r13)
+ addi r3,r4,KVM_TLBIE_LOCK
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */
+ li r0,0x18f
+ rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
+ or r0,r7,r0
+ ptesync
+ sync
+ mtspr SPRN_HID4,r0 /* switch to reserved LPID */
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop guest tlbie_lock */
+
+ /* invalidate the whole TLB */
+ li r0,256
+ mtctr r0
+ li r6,0
+25: tlbiel r6
+ addi r6,r6,0x1000
+ bdnz 25b
+ ptesync
+
+ /* take native_tlbie_lock */
+ ld r3,toc_tlbie_lock@toc(2)
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r6,KVM_HOST_SDR1(r4)
+ mtspr SPRN_SDR1,r6 /* switch to host page table */
+
+ /* Set up host HID4 value */
+ sync
+ mtspr SPRN_HID4,r7
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop native_tlbie_lock */
+
+ lis r8,0x7fff /* MAX_INT@h */
+ mtspr SPRN_HDEC,r8
+
+ /* Disable HDEC interrupts */
+ mfspr r0,SPRN_HID0
+ li r3,0
+ rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
+ sync
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+
+ /* load host SLB entries */
+33: ld r8,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ ld r5,SLBSHADOW_SAVEAREA(r8)
+ ld r6,SLBSHADOW_SAVEAREA+8(r8)
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r8,r8,16
+ .endr
+
+ /* Save and reset AMR and UAMOR before turning on the MMU */
+BEGIN_FTR_SECTION
+ mfspr r5,SPRN_AMR
+ mfspr r6,SPRN_UAMOR
+ std r5,VCPU_AMR(r9)
+ std r6,VCPU_UAMOR(r9)
+ li r6,0
+ mtspr SPRN_AMR,r6
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Restore host DABR and DABRX */
+ ld r5,HSTATE_DABR(r13)
+ li r6,7
+ mtspr SPRN_DABR,r5
+ mtspr SPRN_DABRX,r6
+
+ /* Switch DSCR back to host value */
+BEGIN_FTR_SECTION
+ mfspr r8, SPRN_DSCR
+ ld r7, HSTATE_DSCR(r13)
+ std r8, VCPU_DSCR(r7)
+ mtspr SPRN_DSCR, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save non-volatile GPRs */
+ std r14, VCPU_GPR(r14)(r9)
+ std r15, VCPU_GPR(r15)(r9)
+ std r16, VCPU_GPR(r16)(r9)
+ std r17, VCPU_GPR(r17)(r9)
+ std r18, VCPU_GPR(r18)(r9)
+ std r19, VCPU_GPR(r19)(r9)
+ std r20, VCPU_GPR(r20)(r9)
+ std r21, VCPU_GPR(r21)(r9)
+ std r22, VCPU_GPR(r22)(r9)
+ std r23, VCPU_GPR(r23)(r9)
+ std r24, VCPU_GPR(r24)(r9)
+ std r25, VCPU_GPR(r25)(r9)
+ std r26, VCPU_GPR(r26)(r9)
+ std r27, VCPU_GPR(r27)(r9)
+ std r28, VCPU_GPR(r28)(r9)
+ std r29, VCPU_GPR(r29)(r9)
+ std r30, VCPU_GPR(r30)(r9)
+ std r31, VCPU_GPR(r31)(r9)
+
+ /* Save SPRGs */
+ mfspr r3, SPRN_SPRG0
+ mfspr r4, SPRN_SPRG1
+ mfspr r5, SPRN_SPRG2
+ mfspr r6, SPRN_SPRG3
+ std r3, VCPU_SPRG0(r9)
+ std r4, VCPU_SPRG1(r9)
+ std r5, VCPU_SPRG2(r9)
+ std r6, VCPU_SPRG3(r9)
+
+ /* Increment yield count if they have a VPA */
+ ld r8, VCPU_VPA(r9) /* do they have a VPA? */
+ cmpdi r8, 0
+ beq 25f
+ lwz r3, LPPACA_YIELDCOUNT(r8)
+ addi r3, r3, 1
+ stw r3, LPPACA_YIELDCOUNT(r8)
+25:
+ /* Save PMU registers if requested */
+ /* r8 and cr0.eq are live here */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r4, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+ beq 21f /* if no VPA, save PMU stuff anyway */
+ lbz r7, LPPACA_PMCINUSE(r8)
+ cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
+ bne 21f
+ std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
+ b 22f
+21: mfspr r5, SPRN_MMCR1
+ mfspr r6, SPRN_MMCRA
+ std r4, VCPU_MMCR(r9)
+ std r5, VCPU_MMCR + 8(r9)
+ std r6, VCPU_MMCR + 16(r9)
+ mfspr r3, SPRN_PMC1
+ mfspr r4, SPRN_PMC2
+ mfspr r5, SPRN_PMC3
+ mfspr r6, SPRN_PMC4
+ mfspr r7, SPRN_PMC5
+ mfspr r8, SPRN_PMC6
+BEGIN_FTR_SECTION
+ mfspr r10, SPRN_PMC7
+ mfspr r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ stw r3, VCPU_PMC(r9)
+ stw r4, VCPU_PMC + 4(r9)
+ stw r5, VCPU_PMC + 8(r9)
+ stw r6, VCPU_PMC + 12(r9)
+ stw r7, VCPU_PMC + 16(r9)
+ stw r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+ stw r10, VCPU_PMC + 24(r9)
+ stw r11, VCPU_PMC + 28(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+22:
+ /* save FP state */
+ mr r3, r9
+ bl .kvmppc_save_fp
+
+ /* Secondary threads go off to take a nap on POWER7 */
+BEGIN_FTR_SECTION
+ lwz r0,VCPU_PTID(r3)
+ cmpwi r0,0
+ bne secondary_nap
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /*
+ * Reload DEC. HDEC interrupts were disabled when
+ * we reloaded the host's LPCR value.
+ */
+ ld r3, HSTATE_DECEXP(r13)
+ mftb r4
+ subf r4, r4, r3
+ mtspr SPRN_DEC, r4
+
+ /* Reload the host's PMU registers */
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r4, LPPACA_PMCINUSE(r3)
+ cmpwi r4, 0
+ beq 23f /* skip if not */
+ lwz r3, HSTATE_PMC(r13)
+ lwz r4, HSTATE_PMC + 4(r13)
+ lwz r5, HSTATE_PMC + 8(r13)
+ lwz r6, HSTATE_PMC + 12(r13)
+ lwz r8, HSTATE_PMC + 16(r13)
+ lwz r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+ lwz r10, HSTATE_PMC + 24(r13)
+ lwz r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r4
+ mtspr SPRN_PMC3, r5
+ mtspr SPRN_PMC4, r6
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, HSTATE_MMCR(r13)
+ ld r4, HSTATE_MMCR + 8(r13)
+ ld r5, HSTATE_MMCR + 16(r13)
+ mtspr SPRN_MMCR1, r4
+ mtspr SPRN_MMCRA, r5
+ mtspr SPRN_MMCR0, r3
+ isync
+23:
+ /*
+ * For external and machine check interrupts, we need
+ * to call the Linux handler to process the interrupt.
+ * We do that by jumping to the interrupt vector address
+ * which we have in r12. The [h]rfid at the end of the
+ * handler will return to the book3s_hv_interrupts.S code.
+ * For other interrupts we do the rfid to get back
+ * to the book3s_interrupts.S code here.
+ */
+ ld r8, HSTATE_VMHANDLER(r13)
+ ld r7, HSTATE_HOST_MSR(r13)
+
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ beq 11f
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+
+ /* RFI into the highmem handler, or branch to interrupt handler */
+12: mfmsr r6
+ mtctr r12
+ li r0, MSR_RI
+ andc r6, r6, r0
+ mtmsrd r6, 1 /* Clear RI in MSR */
+ mtsrr0 r8
+ mtsrr1 r7
+ beqctr
+ RFI
+
+11:
+BEGIN_FTR_SECTION
+ b 12b
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_HSRR0, r8
+ mtspr SPRN_HSRR1, r7
+ ba 0x500
+
+/*
+ * Check whether an HDSI is an HPTE not found fault or something else.
+ * If it is an HPTE not found fault that is due to the guest accessing
+ * a page that they have mapped but which we have paged out, then
+ * we continue on with the guest exit path. In all other cases,
+ * reflect the HDSI to the guest as a DSI.
+ */
+kvmppc_hdsi:
+ mfspr r4, SPRN_HDAR
+ mfspr r6, SPRN_HDSISR
+ /* HPTE not found fault or protection fault? */
+ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
+ beq 1f /* if not, send it to the guest */
+ andi. r0, r11, MSR_DR /* data relocation enabled? */
+ beq 3f
+ clrrdi r0, r4, 28
+ PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
+ bne 1f /* if no SLB entry found */
+4: std r4, VCPU_FAULT_DAR(r9)
+ stw r6, VCPU_FAULT_DSISR(r9)
+
+ /* Search the hash table. */
+ mr r3, r9 /* vcpu pointer */
+ li r7, 1 /* data fault */
+ bl .kvmppc_hpte_hv_fault
+ ld r9, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+ cmpdi r3, 0 /* retry the instruction */
+ beq 6f
+ cmpdi r3, -1 /* handle in kernel mode */
+ beq nohpte_cont
+ cmpdi r3, -2 /* MMIO emulation; need instr word */
+ beq 2f
+
+ /* Synthesize a DSI for the guest */
+ ld r4, VCPU_FAULT_DAR(r9)
+ mr r6, r3
+1: mtspr SPRN_DAR, r4
+ mtspr SPRN_DSISR, r6
+ mtspr SPRN_SRR0, r10
+ mtspr SPRN_SRR1, r11
+ li r10, BOOK3S_INTERRUPT_DATA_STORAGE
+ li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11, r11, 63
+6: ld r7, VCPU_CTR(r9)
+ lwz r8, VCPU_XER(r9)
+ mtctr r7
+ mtxer r8
+ mr r4, r9
+ b fast_guest_return
+
+3: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */
+ ld r5, KVM_VRMA_SLB_V(r5)
+ b 4b
+
+ /* If this is for emulated MMIO, load the instruction word */
+2: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */
+
+ /* Set guest mode to 'jump over instruction' so if lwz faults
+ * we'll just continue at the next IP. */
+ li r0, KVM_GUEST_MODE_SKIP
+ stb r0, HSTATE_IN_GUEST(r13)
+
+ /* Do the access with MSR:DR enabled */
+ mfmsr r3
+ ori r4, r3, MSR_DR /* Enable paging for data */
+ mtmsrd r4
+ lwz r8, 0(r10)
+ mtmsrd r3
+
+ /* Store the result */
+ stw r8, VCPU_LAST_INST(r9)
+
+ /* Unset guest mode. */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+ b nohpte_cont
+
+/*
+ * Similarly for an HISI, reflect it to the guest as an ISI unless
+ * it is an HPTE not found fault for a page that we have paged out.
+ */
+kvmppc_hisi:
+ andis. r0, r11, SRR1_ISI_NOPT@h
+ beq 1f
+ andi. r0, r11, MSR_IR /* instruction relocation enabled? */
+ beq 3f
+ clrrdi r0, r10, 28
+ PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
+ bne 1f /* if no SLB entry found */
+4:
+ /* Search the hash table. */
+ mr r3, r9 /* vcpu pointer */
+ mr r4, r10
+ mr r6, r11
+ li r7, 0 /* instruction fault */
+ bl .kvmppc_hpte_hv_fault
+ ld r9, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+ cmpdi r3, 0 /* retry the instruction */
+ beq 6f
+ cmpdi r3, -1 /* handle in kernel mode */
+ beq nohpte_cont
+
+ /* Synthesize an ISI for the guest */
+ mr r11, r3
+1: mtspr SPRN_SRR0, r10
+ mtspr SPRN_SRR1, r11
+ li r10, BOOK3S_INTERRUPT_INST_STORAGE
+ li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11, r11, 63
+6: ld r7, VCPU_CTR(r9)
+ lwz r8, VCPU_XER(r9)
+ mtctr r7
+ mtxer r8
+ mr r4, r9
+ b fast_guest_return
+
+3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
+ ld r5, KVM_VRMA_SLB_V(r6)
+ b 4b
+
+/*
+ * Try to handle an hcall in real mode.
+ * Returns to the guest if we handle it, or continues on up to
+ * the kernel if we can't (i.e. if we don't have a handler for
+ * it, or if the handler returns H_TOO_HARD).
+ */
+ .globl hcall_try_real_mode
+hcall_try_real_mode:
+ ld r3,VCPU_GPR(r3)(r9)
+ andi. r0,r11,MSR_PR
+ bne hcall_real_cont
+ clrrdi r3,r3,2
+ cmpldi r3,hcall_real_table_end - hcall_real_table
+ bge hcall_real_cont
+ LOAD_REG_ADDR(r4, hcall_real_table)
+ lwzx r3,r3,r4
+ cmpwi r3,0
+ beq hcall_real_cont
+ add r3,r3,r4
+ mtctr r3
+ mr r3,r9 /* get vcpu pointer */
+ ld r4,VCPU_GPR(r4)(r9)
+ bctrl
+ cmpdi r3,H_TOO_HARD
+ beq hcall_real_fallback
+ ld r4,HSTATE_KVM_VCPU(r13)
+ std r3,VCPU_GPR(r3)(r4)
+ ld r10,VCPU_PC(r4)
+ ld r11,VCPU_MSR(r4)
+ b fast_guest_return
+
+ /* We've attempted a real mode hcall, but it's punted it back
+ * to userspace. We need to restore some clobbered volatiles
+ * before resuming the pass-it-to-qemu path */
+hcall_real_fallback:
+ li r12,BOOK3S_INTERRUPT_SYSCALL
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ b hcall_real_cont
+
+ .globl hcall_real_table
+hcall_real_table:
+ .long 0 /* 0 - unused */
+ .long .kvmppc_h_remove - hcall_real_table
+ .long .kvmppc_h_enter - hcall_real_table
+ .long .kvmppc_h_read - hcall_real_table
+ .long 0 /* 0x10 - H_CLEAR_MOD */
+ .long 0 /* 0x14 - H_CLEAR_REF */
+ .long .kvmppc_h_protect - hcall_real_table
+ .long 0 /* 0x1c - H_GET_TCE */
+ .long .kvmppc_h_put_tce - hcall_real_table
+ .long 0 /* 0x24 - H_SET_SPRG0 */
+ .long .kvmppc_h_set_dabr - hcall_real_table
+ .long 0 /* 0x2c */
+ .long 0 /* 0x30 */
+ .long 0 /* 0x34 */
+ .long 0 /* 0x38 */
+ .long 0 /* 0x3c */
+ .long 0 /* 0x40 */
+ .long 0 /* 0x44 */
+ .long 0 /* 0x48 */
+ .long 0 /* 0x4c */
+ .long 0 /* 0x50 */
+ .long 0 /* 0x54 */
+ .long 0 /* 0x58 */
+ .long 0 /* 0x5c */
+ .long 0 /* 0x60 */
+ .long 0 /* 0x64 */
+ .long 0 /* 0x68 */
+ .long 0 /* 0x6c */
+ .long 0 /* 0x70 */
+ .long 0 /* 0x74 */
+ .long 0 /* 0x78 */
+ .long 0 /* 0x7c */
+ .long 0 /* 0x80 */
+ .long 0 /* 0x84 */
+ .long 0 /* 0x88 */
+ .long 0 /* 0x8c */
+ .long 0 /* 0x90 */
+ .long 0 /* 0x94 */
+ .long 0 /* 0x98 */
+ .long 0 /* 0x9c */
+ .long 0 /* 0xa0 */
+ .long 0 /* 0xa4 */
+ .long 0 /* 0xa8 */
+ .long 0 /* 0xac */
+ .long 0 /* 0xb0 */
+ .long 0 /* 0xb4 */
+ .long 0 /* 0xb8 */
+ .long 0 /* 0xbc */
+ .long 0 /* 0xc0 */
+ .long 0 /* 0xc4 */
+ .long 0 /* 0xc8 */
+ .long 0 /* 0xcc */
+ .long 0 /* 0xd0 */
+ .long 0 /* 0xd4 */
+ .long 0 /* 0xd8 */
+ .long 0 /* 0xdc */
+ .long .kvmppc_h_cede - hcall_real_table
+ .long 0 /* 0xe4 */
+ .long 0 /* 0xe8 */
+ .long 0 /* 0xec */
+ .long 0 /* 0xf0 */
+ .long 0 /* 0xf4 */
+ .long 0 /* 0xf8 */
+ .long 0 /* 0xfc */
+ .long 0 /* 0x100 */
+ .long 0 /* 0x104 */
+ .long 0 /* 0x108 */
+ .long 0 /* 0x10c */
+ .long 0 /* 0x110 */
+ .long 0 /* 0x114 */
+ .long 0 /* 0x118 */
+ .long 0 /* 0x11c */
+ .long 0 /* 0x120 */
+ .long .kvmppc_h_bulk_remove - hcall_real_table
+hcall_real_table_end:
+
+ignore_hdec:
+ mr r4,r9
+ b fast_guest_return
+
+bounce_ext_interrupt:
+ mr r4,r9
+ mtspr SPRN_SRR0,r10
+ mtspr SPRN_SRR1,r11
+ li r10,BOOK3S_INTERRUPT_EXTERNAL
+ li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11,r11,63
+ b fast_guest_return
+
+_GLOBAL(kvmppc_h_set_dabr)
+ std r4,VCPU_DABR(r3)
+ mtspr SPRN_DABR,r4
+ li r3,0
+ blr
+
+_GLOBAL(kvmppc_h_cede)
+ ori r11,r11,MSR_EE
+ std r11,VCPU_MSR(r3)
+ li r0,1
+ stb r0,VCPU_CEDED(r3)
+ sync /* order setting ceded vs. testing prodded */
+ lbz r5,VCPU_PRODDED(r3)
+ cmpwi r5,0
+ bne 1f
+ li r0,0 /* set trap to 0 to say hcall is handled */
+ stw r0,VCPU_TRAP(r3)
+ li r0,H_SUCCESS
+ std r0,VCPU_GPR(r3)(r3)
+BEGIN_FTR_SECTION
+ b 2f /* just send it up to host on 970 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+ /*
+ * Set our bit in the bitmask of napping threads unless all the
+ * other threads are already napping, in which case we send this
+ * up to the host.
+ */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ lwz r6,VCPU_PTID(r3)
+ lwz r8,VCORE_ENTRY_EXIT(r5)
+ clrldi r8,r8,56
+ li r0,1
+ sld r0,r0,r6
+ addi r6,r5,VCORE_NAPPING_THREADS
+31: lwarx r4,0,r6
+ or r4,r4,r0
+ PPC_POPCNTW(r7,r4)
+ cmpw r7,r8
+ bge 2f
+ stwcx. r4,0,r6
+ bne 31b
+ li r0,1
+ stb r0,HSTATE_NAPPING(r13)
+ /* order napping_threads update vs testing entry_exit_count */
+ lwsync
+ mr r4,r3
+ lwz r7,VCORE_ENTRY_EXIT(r5)
+ cmpwi r7,0x100
+ bge 33f /* another thread already exiting */
+
+/*
+ * Although not specifically required by the architecture, POWER7
+ * preserves the following registers in nap mode, even if an SMT mode
+ * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
+ * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
+ */
+ /* Save non-volatile GPRs */
+ std r14, VCPU_GPR(r14)(r3)
+ std r15, VCPU_GPR(r15)(r3)
+ std r16, VCPU_GPR(r16)(r3)
+ std r17, VCPU_GPR(r17)(r3)
+ std r18, VCPU_GPR(r18)(r3)
+ std r19, VCPU_GPR(r19)(r3)
+ std r20, VCPU_GPR(r20)(r3)
+ std r21, VCPU_GPR(r21)(r3)
+ std r22, VCPU_GPR(r22)(r3)
+ std r23, VCPU_GPR(r23)(r3)
+ std r24, VCPU_GPR(r24)(r3)
+ std r25, VCPU_GPR(r25)(r3)
+ std r26, VCPU_GPR(r26)(r3)
+ std r27, VCPU_GPR(r27)(r3)
+ std r28, VCPU_GPR(r28)(r3)
+ std r29, VCPU_GPR(r29)(r3)
+ std r30, VCPU_GPR(r30)(r3)
+ std r31, VCPU_GPR(r31)(r3)
+
+ /* save FP state */
+ bl .kvmppc_save_fp
+
+ /*
+ * Take a nap until a decrementer or external interrupt occurs,
+ * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
+ */
+ li r0,0x80
+ stb r0,PACAPROCSTART(r13)
+ mfspr r5,SPRN_LPCR
+ ori r5,r5,LPCR_PECE0 | LPCR_PECE1
+ mtspr SPRN_LPCR,r5
+ isync
+ li r0, 0
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+kvm_end_cede:
+ /* Woken by external or decrementer interrupt */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+
+ /* If we're a secondary thread and we got here by an IPI, ack it */
+ ld r4,HSTATE_KVM_VCPU(r13)
+ lwz r3,VCPU_PTID(r4)
+ cmpwi r3,0
+ beq 27f
+ mfspr r3,SPRN_SRR1
+ rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
+ cmpwi r3,4 /* was it an external interrupt? */
+ bne 27f
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r0,0xff
+ li r6,XICS_QIRR
+ li r7,XICS_XIRR
+ lwzcix r8,r5,r7 /* ack the interrupt */
+ sync
+ stbcix r0,r5,r6 /* clear it */
+ stwcix r8,r5,r7 /* EOI it */
+27:
+ /* load up FP state */
+ bl kvmppc_load_fp
+
+ /* Load NV GPRS */
+ ld r14, VCPU_GPR(r14)(r4)
+ ld r15, VCPU_GPR(r15)(r4)
+ ld r16, VCPU_GPR(r16)(r4)
+ ld r17, VCPU_GPR(r17)(r4)
+ ld r18, VCPU_GPR(r18)(r4)
+ ld r19, VCPU_GPR(r19)(r4)
+ ld r20, VCPU_GPR(r20)(r4)
+ ld r21, VCPU_GPR(r21)(r4)
+ ld r22, VCPU_GPR(r22)(r4)
+ ld r23, VCPU_GPR(r23)(r4)
+ ld r24, VCPU_GPR(r24)(r4)
+ ld r25, VCPU_GPR(r25)(r4)
+ ld r26, VCPU_GPR(r26)(r4)
+ ld r27, VCPU_GPR(r27)(r4)
+ ld r28, VCPU_GPR(r28)(r4)
+ ld r29, VCPU_GPR(r29)(r4)
+ ld r30, VCPU_GPR(r30)(r4)
+ ld r31, VCPU_GPR(r31)(r4)
+
+ /* clear our bit in vcore->napping_threads */
+33: ld r5,HSTATE_KVM_VCORE(r13)
+ lwz r3,VCPU_PTID(r4)
+ li r0,1
+ sld r0,r0,r3
+ addi r6,r5,VCORE_NAPPING_THREADS
+32: lwarx r7,0,r6
+ andc r7,r7,r0
+ stwcx. r7,0,r6
+ bne 32b
+ li r0,0
+ stb r0,HSTATE_NAPPING(r13)
+
+ /* see if any other thread is already exiting */
+ lwz r0,VCORE_ENTRY_EXIT(r5)
+ cmpwi r0,0x100
+ blt kvmppc_cede_reentry /* if not go back to guest */
+
+ /* some threads are exiting, so go to the guest exit path */
+ b hcall_real_fallback
+
+ /* cede when already previously prodded case */
+1: li r0,0
+ stb r0,VCPU_PRODDED(r3)
+ sync /* order testing prodded vs. clearing ceded */
+ stb r0,VCPU_CEDED(r3)
+ li r3,H_SUCCESS
+ blr
+
+ /* we've ceded but we want to give control to the host */
+2: li r3,H_TOO_HARD
+ blr
+
+secondary_too_late:
+ ld r5,HSTATE_KVM_VCORE(r13)
+ HMT_LOW
+13: lbz r3,VCORE_IN_GUEST(r5)
+ cmpwi r3,0
+ bne 13b
+ HMT_MEDIUM
+ ld r11,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ ld r5,SLBSHADOW_SAVEAREA(r11)
+ ld r6,SLBSHADOW_SAVEAREA+8(r11)
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r11,r11,16
+ .endr
+
+secondary_nap:
+ /* Clear any pending IPI - assume we're a secondary thread */
+ ld r5, HSTATE_XICS_PHYS(r13)
+ li r7, XICS_XIRR
+ lwzcix r3, r5, r7 /* ack any pending interrupt */
+ rlwinm. r0, r3, 0, 0xffffff /* any pending? */
+ beq 37f
+ sync
+ li r0, 0xff
+ li r6, XICS_QIRR
+ stbcix r0, r5, r6 /* clear the IPI */
+ stwcix r3, r5, r7 /* EOI it */
+37: sync
+
+ /* increment the nap count and then go to nap mode */
+ ld r4, HSTATE_KVM_VCORE(r13)
+ addi r4, r4, VCORE_NAP_COUNT
+ lwsync /* make previous updates visible */
+51: lwarx r3, 0, r4
+ addi r3, r3, 1
+ stwcx. r3, 0, r4
+ bne 51b
+
+ li r3, LPCR_PECE0
+ mfspr r4, SPRN_LPCR
+ rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
+ mtspr SPRN_LPCR, r4
+ isync
+ li r0, 0
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+/*
+ * Save away FP, VMX and VSX registers.
+ * r3 = vcpu pointer
+ */
+_GLOBAL(kvmppc_save_fp)
+ mfmsr r9
+ ori r8,r9,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ mtmsrd r8
+ isync
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ reg = 0
+ .rept 32
+ li r6,reg*16+VCPU_VSRS
+ STXVD2X(reg,r6,r3)
+ reg = reg + 1
+ .endr
+FTR_SECTION_ELSE
+#endif
+ reg = 0
+ .rept 32
+ stfd reg,reg*8+VCPU_FPRS(r3)
+ reg = reg + 1
+ .endr
+#ifdef CONFIG_VSX
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
+#endif
+ mffs fr0
+ stfd fr0,VCPU_FPSCR(r3)
+
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ reg = 0
+ .rept 32
+ li r6,reg*16+VCPU_VRS
+ stvx reg,r6,r3
+ reg = reg + 1
+ .endr
+ mfvscr vr0
+ li r6,VCPU_VSCR
+ stvx vr0,r6,r3
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ mfspr r6,SPRN_VRSAVE
+ stw r6,VCPU_VRSAVE(r3)
+ mtmsrd r9
+ isync
+ blr
+
+/*
+ * Load up FP, VMX and VSX registers
+ * r4 = vcpu pointer
+ */
+ .globl kvmppc_load_fp
+kvmppc_load_fp:
+ mfmsr r9
+ ori r8,r9,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ mtmsrd r8
+ isync
+ lfd fr0,VCPU_FPSCR(r4)
+ MTFSF_L(fr0)
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ reg = 0
+ .rept 32
+ li r7,reg*16+VCPU_VSRS
+ LXVD2X(reg,r7,r4)
+ reg = reg + 1
+ .endr
+FTR_SECTION_ELSE
+#endif
+ reg = 0
+ .rept 32
+ lfd reg,reg*8+VCPU_FPRS(r4)
+ reg = reg + 1
+ .endr
+#ifdef CONFIG_VSX
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
+#endif
+
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ li r7,VCPU_VSCR
+ lvx vr0,r7,r4
+ mtvscr vr0
+ reg = 0
+ .rept 32
+ li r7,reg*16+VCPU_VRS
+ lvx reg,r7,r4
+ reg = reg + 1
+ .endr
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ lwz r7,VCPU_VRSAVE(r4)
+ mtspr SPRN_VRSAVE,r7
+ blr
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
new file mode 100644
index 00000000..3e35383b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -0,0 +1,201 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+
+#define ULONG_SIZE 8
+#define FUNC(name) GLUE(.,name)
+
+#elif defined(CONFIG_PPC_BOOK3S_32)
+
+#define ULONG_SIZE 4
+#define FUNC(name) name
+
+#endif /* CONFIG_PPC_BOOK3S_XX */
+
+
+#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
+#define VCPU_LOAD_NVGPRS(vcpu) \
+ PPC_LL r14, VCPU_GPR(r14)(vcpu); \
+ PPC_LL r15, VCPU_GPR(r15)(vcpu); \
+ PPC_LL r16, VCPU_GPR(r16)(vcpu); \
+ PPC_LL r17, VCPU_GPR(r17)(vcpu); \
+ PPC_LL r18, VCPU_GPR(r18)(vcpu); \
+ PPC_LL r19, VCPU_GPR(r19)(vcpu); \
+ PPC_LL r20, VCPU_GPR(r20)(vcpu); \
+ PPC_LL r21, VCPU_GPR(r21)(vcpu); \
+ PPC_LL r22, VCPU_GPR(r22)(vcpu); \
+ PPC_LL r23, VCPU_GPR(r23)(vcpu); \
+ PPC_LL r24, VCPU_GPR(r24)(vcpu); \
+ PPC_LL r25, VCPU_GPR(r25)(vcpu); \
+ PPC_LL r26, VCPU_GPR(r26)(vcpu); \
+ PPC_LL r27, VCPU_GPR(r27)(vcpu); \
+ PPC_LL r28, VCPU_GPR(r28)(vcpu); \
+ PPC_LL r29, VCPU_GPR(r29)(vcpu); \
+ PPC_LL r30, VCPU_GPR(r30)(vcpu); \
+ PPC_LL r31, VCPU_GPR(r31)(vcpu); \
+
+/*****************************************************************************
+ * *
+ * Guest entry / exit code that is in kernel module memory (highmem) *
+ * *
+ ****************************************************************************/
+
+/* Registers:
+ * r3: kvm_run pointer
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+
+kvm_start_entry:
+ /* Write correct stack frame */
+ mflr r0
+ PPC_STL r0,PPC_LR_STKOFF(r1)
+
+ /* Save host state to the stack */
+ PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
+
+ /* Save r3 (kvm_run) and r4 (vcpu) */
+ SAVE_2GPRS(3, r1)
+
+ /* Save non-volatile registers (r14 - r31) */
+ SAVE_NVGPRS(r1)
+
+ /* Save CR */
+ mfcr r14
+ stw r14, _CCR(r1)
+
+ /* Save LR */
+ PPC_STL r0, _LINK(r1)
+
+ /* Load non-volatile guest state from the vcpu */
+ VCPU_LOAD_NVGPRS(r4)
+
+kvm_start_lightweight:
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ PPC_LL r3, VCPU_HFLAGS(r4)
+ rldicl r3, r3, 0, 63 /* r3 &= 1 */
+ stb r3, HSTATE_RESTORE_HID5(r13)
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
+
+ /* Jump to segment patching handler and into our guest */
+ bl FUNC(kvmppc_entry_trampoline)
+ nop
+
+/*
+ * This is the handler in module memory. It gets jumped at from the
+ * lowmem trampoline code, so it's basically the guest exit code.
+ *
+ */
+
+.global kvmppc_handler_highmem
+kvmppc_handler_highmem:
+
+ /*
+ * Register usage at this point:
+ *
+ * R1 = host R1
+ * R2 = host R2
+ * R12 = exit handler id
+ * R13 = PACA
+ * SVCPU.* = guest *
+ *
+ */
+
+ /* R7 = vcpu */
+ PPC_LL r7, GPR4(r1)
+
+ PPC_STL r14, VCPU_GPR(r14)(r7)
+ PPC_STL r15, VCPU_GPR(r15)(r7)
+ PPC_STL r16, VCPU_GPR(r16)(r7)
+ PPC_STL r17, VCPU_GPR(r17)(r7)
+ PPC_STL r18, VCPU_GPR(r18)(r7)
+ PPC_STL r19, VCPU_GPR(r19)(r7)
+ PPC_STL r20, VCPU_GPR(r20)(r7)
+ PPC_STL r21, VCPU_GPR(r21)(r7)
+ PPC_STL r22, VCPU_GPR(r22)(r7)
+ PPC_STL r23, VCPU_GPR(r23)(r7)
+ PPC_STL r24, VCPU_GPR(r24)(r7)
+ PPC_STL r25, VCPU_GPR(r25)(r7)
+ PPC_STL r26, VCPU_GPR(r26)(r7)
+ PPC_STL r27, VCPU_GPR(r27)(r7)
+ PPC_STL r28, VCPU_GPR(r28)(r7)
+ PPC_STL r29, VCPU_GPR(r29)(r7)
+ PPC_STL r30, VCPU_GPR(r30)(r7)
+ PPC_STL r31, VCPU_GPR(r31)(r7)
+
+ /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
+ mr r5, r12
+
+ /* Restore r3 (kvm_run) and r4 (vcpu) */
+ REST_2GPRS(3, r1)
+ bl FUNC(kvmppc_handle_exit)
+
+ /* If RESUME_GUEST, get back in the loop */
+ cmpwi r3, RESUME_GUEST
+ beq kvm_loop_lightweight
+
+ cmpwi r3, RESUME_GUEST_NV
+ beq kvm_loop_heavyweight
+
+kvm_exit_loop:
+
+ PPC_LL r4, _LINK(r1)
+ mtlr r4
+
+ lwz r14, _CCR(r1)
+ mtcr r14
+
+ /* Restore non-volatile host registers (r14 - r31) */
+ REST_NVGPRS(r1)
+
+ addi r1, r1, SWITCH_FRAME_SIZE
+ blr
+
+kvm_loop_heavyweight:
+
+ PPC_LL r4, _LINK(r1)
+ PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
+
+ /* Load vcpu and cpu_run */
+ REST_2GPRS(3, r1)
+
+ /* Load non-volatile guest state from the vcpu */
+ VCPU_LOAD_NVGPRS(r4)
+
+ /* Jump back into the beginning of this function */
+ b kvm_start_lightweight
+
+kvm_loop_lightweight:
+
+ /* We'll need the vcpu pointer */
+ REST_GPR(4, r1)
+
+ /* Jump back into the beginning of this function */
+ b kvm_start_lightweight
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
new file mode 100644
index 00000000..41cb0017
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/hash.h>
+#include <linux/slab.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+
+#include "trace.h"
+
+#define PTE_SIZE 12
+
+static struct kmem_cache *hpte_cache;
+
+static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
+{
+ return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE);
+}
+
+static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr)
+{
+ return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE,
+ HPTEG_HASH_BITS_PTE_LONG);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte(u64 vpage)
+{
+ return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
+{
+ return hash_64((vpage & 0xffffff000ULL) >> 12,
+ HPTEG_HASH_BITS_VPTE_LONG);
+}
+
+void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+ u64 index;
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
+ trace_kvm_book3s_mmu_map(pte);
+
+ spin_lock(&vcpu3s->mmu_lock);
+
+ /* Add to ePTE list */
+ index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
+ hlist_add_head_rcu(&pte->list_pte, &vcpu3s->hpte_hash_pte[index]);
+
+ /* Add to ePTE_long list */
+ index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
+ hlist_add_head_rcu(&pte->list_pte_long,
+ &vcpu3s->hpte_hash_pte_long[index]);
+
+ /* Add to vPTE list */
+ index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
+ hlist_add_head_rcu(&pte->list_vpte, &vcpu3s->hpte_hash_vpte[index]);
+
+ /* Add to vPTE_long list */
+ index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
+ hlist_add_head_rcu(&pte->list_vpte_long,
+ &vcpu3s->hpte_hash_vpte_long[index]);
+
+ spin_unlock(&vcpu3s->mmu_lock);
+}
+
+static void free_pte_rcu(struct rcu_head *head)
+{
+ struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head);
+ kmem_cache_free(hpte_cache, pte);
+}
+
+static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
+ trace_kvm_book3s_mmu_invalidate(pte);
+
+ /* Different for 32 and 64 bit */
+ kvmppc_mmu_invalidate_pte(vcpu, pte);
+
+ spin_lock(&vcpu3s->mmu_lock);
+
+ /* pte already invalidated in between? */
+ if (hlist_unhashed(&pte->list_pte)) {
+ spin_unlock(&vcpu3s->mmu_lock);
+ return;
+ }
+
+ hlist_del_init_rcu(&pte->list_pte);
+ hlist_del_init_rcu(&pte->list_pte_long);
+ hlist_del_init_rcu(&pte->list_vpte);
+ hlist_del_init_rcu(&pte->list_vpte_long);
+
+ if (pte->pte.may_write)
+ kvm_release_pfn_dirty(pte->pfn);
+ else
+ kvm_release_pfn_clean(pte->pfn);
+
+ spin_unlock(&vcpu3s->mmu_lock);
+
+ vcpu3s->hpte_cache_count--;
+ call_rcu(&pte->rcu_head, free_pte_rcu);
+}
+
+static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hpte_cache *pte;
+ struct hlist_node *node;
+ int i;
+
+ rcu_read_lock();
+
+ for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
+ struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
+
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+ invalidate_pte(vcpu, pte);
+ }
+
+ rcu_read_unlock();
+}
+
+static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hlist_head *list;
+ struct hlist_node *node;
+ struct hpte_cache *pte;
+
+ /* Find the list of entries in the map */
+ list = &vcpu3s->hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
+
+ rcu_read_lock();
+
+ /* Check the list for matching entries and invalidate */
+ hlist_for_each_entry_rcu(pte, node, list, list_pte)
+ if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
+ invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
+}
+
+static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hlist_head *list;
+ struct hlist_node *node;
+ struct hpte_cache *pte;
+
+ /* Find the list of entries in the map */
+ list = &vcpu3s->hpte_hash_pte_long[
+ kvmppc_mmu_hash_pte_long(guest_ea)];
+
+ rcu_read_lock();
+
+ /* Check the list for matching entries and invalidate */
+ hlist_for_each_entry_rcu(pte, node, list, list_pte_long)
+ if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea)
+ invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
+}
+
+void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
+{
+ trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask);
+ guest_ea &= ea_mask;
+
+ switch (ea_mask) {
+ case ~0xfffUL:
+ kvmppc_mmu_pte_flush_page(vcpu, guest_ea);
+ break;
+ case 0x0ffff000:
+ kvmppc_mmu_pte_flush_long(vcpu, guest_ea);
+ break;
+ case 0:
+ /* Doing a complete flush -> start from scratch */
+ kvmppc_mmu_pte_flush_all(vcpu);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+}
+
+/* Flush with mask 0xfffffffff */
+static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hlist_head *list;
+ struct hlist_node *node;
+ struct hpte_cache *pte;
+ u64 vp_mask = 0xfffffffffULL;
+
+ list = &vcpu3s->hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
+
+ rcu_read_lock();
+
+ /* Check the list for matching entries and invalidate */
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte)
+ if ((pte->pte.vpage & vp_mask) == guest_vp)
+ invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
+}
+
+/* Flush with mask 0xffffff000 */
+static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hlist_head *list;
+ struct hlist_node *node;
+ struct hpte_cache *pte;
+ u64 vp_mask = 0xffffff000ULL;
+
+ list = &vcpu3s->hpte_hash_vpte_long[
+ kvmppc_mmu_hash_vpte_long(guest_vp)];
+
+ rcu_read_lock();
+
+ /* Check the list for matching entries and invalidate */
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+ if ((pte->pte.vpage & vp_mask) == guest_vp)
+ invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
+}
+
+void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
+{
+ trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask);
+ guest_vp &= vp_mask;
+
+ switch(vp_mask) {
+ case 0xfffffffffULL:
+ kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
+ break;
+ case 0xffffff000ULL:
+ kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
+ break;
+ default:
+ WARN_ON(1);
+ return;
+ }
+}
+
+void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hlist_node *node;
+ struct hpte_cache *pte;
+ int i;
+
+ trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end);
+
+ rcu_read_lock();
+
+ for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
+ struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
+
+ hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+ if ((pte->pte.raddr >= pa_start) &&
+ (pte->pte.raddr < pa_end))
+ invalidate_pte(vcpu, pte);
+ }
+
+ rcu_read_unlock();
+}
+
+struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hpte_cache *pte;
+
+ pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+ vcpu3s->hpte_cache_count++;
+
+ if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
+ kvmppc_mmu_pte_flush_all(vcpu);
+
+ return pte;
+}
+
+void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
+{
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+}
+
+static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ INIT_HLIST_HEAD(&hash_list[i]);
+}
+
+int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
+ /* init hpte lookup hashes */
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte,
+ ARRAY_SIZE(vcpu3s->hpte_hash_pte));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte_long,
+ ARRAY_SIZE(vcpu3s->hpte_hash_pte_long));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+
+ spin_lock_init(&vcpu3s->mmu_lock);
+
+ return 0;
+}
+
+int kvmppc_mmu_hpte_sysinit(void)
+{
+ /* init hpte slab cache */
+ hpte_cache = kmem_cache_create("kvm-spt", sizeof(struct hpte_cache),
+ sizeof(struct hpte_cache), 0, NULL);
+
+ return 0;
+}
+
+void kvmppc_mmu_hpte_sysexit(void)
+{
+ kmem_cache_destroy(hpte_cache);
+}
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
new file mode 100644
index 00000000..a59a25a1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -0,0 +1,1270 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright Novell Inc 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/kvm.h>
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_fpu.h>
+#include <asm/reg.h>
+#include <asm/cacheflush.h>
+#include <asm/switch_to.h>
+#include <linux/vmalloc.h>
+
+/* #define DEBUG */
+
+#ifdef DEBUG
+#define dprintk printk
+#else
+#define dprintk(...) do { } while(0);
+#endif
+
+#define OP_LFS 48
+#define OP_LFSU 49
+#define OP_LFD 50
+#define OP_LFDU 51
+#define OP_STFS 52
+#define OP_STFSU 53
+#define OP_STFD 54
+#define OP_STFDU 55
+#define OP_PSQ_L 56
+#define OP_PSQ_LU 57
+#define OP_PSQ_ST 60
+#define OP_PSQ_STU 61
+
+#define OP_31_LFSX 535
+#define OP_31_LFSUX 567
+#define OP_31_LFDX 599
+#define OP_31_LFDUX 631
+#define OP_31_STFSX 663
+#define OP_31_STFSUX 695
+#define OP_31_STFX 727
+#define OP_31_STFUX 759
+#define OP_31_LWIZX 887
+#define OP_31_STFIWX 983
+
+#define OP_59_FADDS 21
+#define OP_59_FSUBS 20
+#define OP_59_FSQRTS 22
+#define OP_59_FDIVS 18
+#define OP_59_FRES 24
+#define OP_59_FMULS 25
+#define OP_59_FRSQRTES 26
+#define OP_59_FMSUBS 28
+#define OP_59_FMADDS 29
+#define OP_59_FNMSUBS 30
+#define OP_59_FNMADDS 31
+
+#define OP_63_FCMPU 0
+#define OP_63_FCPSGN 8
+#define OP_63_FRSP 12
+#define OP_63_FCTIW 14
+#define OP_63_FCTIWZ 15
+#define OP_63_FDIV 18
+#define OP_63_FADD 21
+#define OP_63_FSQRT 22
+#define OP_63_FSEL 23
+#define OP_63_FRE 24
+#define OP_63_FMUL 25
+#define OP_63_FRSQRTE 26
+#define OP_63_FMSUB 28
+#define OP_63_FMADD 29
+#define OP_63_FNMSUB 30
+#define OP_63_FNMADD 31
+#define OP_63_FCMPO 32
+#define OP_63_MTFSB1 38 // XXX
+#define OP_63_FSUB 20
+#define OP_63_FNEG 40
+#define OP_63_MCRFS 64
+#define OP_63_MTFSB0 70
+#define OP_63_FMR 72
+#define OP_63_MTFSFI 134
+#define OP_63_FABS 264
+#define OP_63_MFFS 583
+#define OP_63_MTFSF 711
+
+#define OP_4X_PS_CMPU0 0
+#define OP_4X_PSQ_LX 6
+#define OP_4XW_PSQ_STX 7
+#define OP_4A_PS_SUM0 10
+#define OP_4A_PS_SUM1 11
+#define OP_4A_PS_MULS0 12
+#define OP_4A_PS_MULS1 13
+#define OP_4A_PS_MADDS0 14
+#define OP_4A_PS_MADDS1 15
+#define OP_4A_PS_DIV 18
+#define OP_4A_PS_SUB 20
+#define OP_4A_PS_ADD 21
+#define OP_4A_PS_SEL 23
+#define OP_4A_PS_RES 24
+#define OP_4A_PS_MUL 25
+#define OP_4A_PS_RSQRTE 26
+#define OP_4A_PS_MSUB 28
+#define OP_4A_PS_MADD 29
+#define OP_4A_PS_NMSUB 30
+#define OP_4A_PS_NMADD 31
+#define OP_4X_PS_CMPO0 32
+#define OP_4X_PSQ_LUX 38
+#define OP_4XW_PSQ_STUX 39
+#define OP_4X_PS_NEG 40
+#define OP_4X_PS_CMPU1 64
+#define OP_4X_PS_MR 72
+#define OP_4X_PS_CMPO1 96
+#define OP_4X_PS_NABS 136
+#define OP_4X_PS_ABS 264
+#define OP_4X_PS_MERGE00 528
+#define OP_4X_PS_MERGE01 560
+#define OP_4X_PS_MERGE10 592
+#define OP_4X_PS_MERGE11 624
+
+#define SCALAR_NONE 0
+#define SCALAR_HIGH (1 << 0)
+#define SCALAR_LOW (1 << 1)
+#define SCALAR_NO_PS0 (1 << 2)
+#define SCALAR_NO_PS1 (1 << 3)
+
+#define GQR_ST_TYPE_MASK 0x00000007
+#define GQR_ST_TYPE_SHIFT 0
+#define GQR_ST_SCALE_MASK 0x00003f00
+#define GQR_ST_SCALE_SHIFT 8
+#define GQR_LD_TYPE_MASK 0x00070000
+#define GQR_LD_TYPE_SHIFT 16
+#define GQR_LD_SCALE_MASK 0x3f000000
+#define GQR_LD_SCALE_SHIFT 24
+
+#define GQR_QUANTIZE_FLOAT 0
+#define GQR_QUANTIZE_U8 4
+#define GQR_QUANTIZE_U16 5
+#define GQR_QUANTIZE_S8 6
+#define GQR_QUANTIZE_S16 7
+
+#define FPU_LS_SINGLE 0
+#define FPU_LS_DOUBLE 1
+#define FPU_LS_SINGLE_LOW 2
+
+static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
+{
+ kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
+}
+
+static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
+{
+ u64 dsisr;
+ struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+
+ shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
+ shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
+ shared->dar = eaddr;
+ /* Page Fault */
+ dsisr = kvmppc_set_field(0, 33, 33, 1);
+ if (is_store)
+ shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
+}
+
+static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ int rs, ulong addr, int ls_type)
+{
+ int emulated = EMULATE_FAIL;
+ int r;
+ char tmp[8];
+ int len = sizeof(u32);
+
+ if (ls_type == FPU_LS_DOUBLE)
+ len = sizeof(u64);
+
+ /* read from memory */
+ r = kvmppc_ld(vcpu, &addr, len, tmp, true);
+ vcpu->arch.paddr_accessed = addr;
+
+ if (r < 0) {
+ kvmppc_inject_pf(vcpu, addr, false);
+ goto done_load;
+ } else if (r == EMULATE_DO_MMIO) {
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ len, 1);
+ goto done_load;
+ }
+
+ emulated = EMULATE_DONE;
+
+ /* put in registers */
+ switch (ls_type) {
+ case FPU_LS_SINGLE:
+ kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
+ vcpu->arch.qpr[rs] = *((u32*)tmp);
+ break;
+ case FPU_LS_DOUBLE:
+ vcpu->arch.fpr[rs] = *((u64*)tmp);
+ break;
+ }
+
+ dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp,
+ addr, len);
+
+done_load:
+ return emulated;
+}
+
+static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ int rs, ulong addr, int ls_type)
+{
+ int emulated = EMULATE_FAIL;
+ int r;
+ char tmp[8];
+ u64 val;
+ int len;
+
+ switch (ls_type) {
+ case FPU_LS_SINGLE:
+ kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
+ val = *((u32*)tmp);
+ len = sizeof(u32);
+ break;
+ case FPU_LS_SINGLE_LOW:
+ *((u32*)tmp) = vcpu->arch.fpr[rs];
+ val = vcpu->arch.fpr[rs] & 0xffffffff;
+ len = sizeof(u32);
+ break;
+ case FPU_LS_DOUBLE:
+ *((u64*)tmp) = vcpu->arch.fpr[rs];
+ val = vcpu->arch.fpr[rs];
+ len = sizeof(u64);
+ break;
+ default:
+ val = 0;
+ len = 0;
+ }
+
+ r = kvmppc_st(vcpu, &addr, len, tmp, true);
+ vcpu->arch.paddr_accessed = addr;
+ if (r < 0) {
+ kvmppc_inject_pf(vcpu, addr, true);
+ } else if (r == EMULATE_DO_MMIO) {
+ emulated = kvmppc_handle_store(run, vcpu, val, len, 1);
+ } else {
+ emulated = EMULATE_DONE;
+ }
+
+ dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n",
+ val, addr, len);
+
+ return emulated;
+}
+
+static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ int rs, ulong addr, bool w, int i)
+{
+ int emulated = EMULATE_FAIL;
+ int r;
+ float one = 1.0;
+ u32 tmp[2];
+
+ /* read from memory */
+ if (w) {
+ r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true);
+ memcpy(&tmp[1], &one, sizeof(u32));
+ } else {
+ r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true);
+ }
+ vcpu->arch.paddr_accessed = addr;
+ if (r < 0) {
+ kvmppc_inject_pf(vcpu, addr, false);
+ goto done_load;
+ } else if ((r == EMULATE_DO_MMIO) && w) {
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ 4, 1);
+ vcpu->arch.qpr[rs] = tmp[1];
+ goto done_load;
+ } else if (r == EMULATE_DO_MMIO) {
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
+ 8, 1);
+ goto done_load;
+ }
+
+ emulated = EMULATE_DONE;
+
+ /* put in registers */
+ kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
+ vcpu->arch.qpr[rs] = tmp[1];
+
+ dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
+ tmp[1], addr, w ? 4 : 8);
+
+done_load:
+ return emulated;
+}
+
+static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ int rs, ulong addr, bool w, int i)
+{
+ int emulated = EMULATE_FAIL;
+ int r;
+ u32 tmp[2];
+ int len = w ? sizeof(u32) : sizeof(u64);
+
+ kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
+ tmp[1] = vcpu->arch.qpr[rs];
+
+ r = kvmppc_st(vcpu, &addr, len, tmp, true);
+ vcpu->arch.paddr_accessed = addr;
+ if (r < 0) {
+ kvmppc_inject_pf(vcpu, addr, true);
+ } else if ((r == EMULATE_DO_MMIO) && w) {
+ emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1);
+ } else if (r == EMULATE_DO_MMIO) {
+ u64 val = ((u64)tmp[0] << 32) | tmp[1];
+ emulated = kvmppc_handle_store(run, vcpu, val, 8, 1);
+ } else {
+ emulated = EMULATE_DONE;
+ }
+
+ dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n",
+ tmp[0], tmp[1], addr, len);
+
+ return emulated;
+}
+
+/*
+ * Cuts out inst bits with ordering according to spec.
+ * That means the leftmost bit is zero. All given bits are included.
+ */
+static inline u32 inst_get_field(u32 inst, int msb, int lsb)
+{
+ return kvmppc_get_field(inst, msb + 32, lsb + 32);
+}
+
+/*
+ * Replaces inst bits with ordering according to spec.
+ */
+static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
+{
+ return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
+}
+
+bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
+{
+ if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
+ return false;
+
+ switch (get_op(inst)) {
+ case OP_PSQ_L:
+ case OP_PSQ_LU:
+ case OP_PSQ_ST:
+ case OP_PSQ_STU:
+ case OP_LFS:
+ case OP_LFSU:
+ case OP_LFD:
+ case OP_LFDU:
+ case OP_STFS:
+ case OP_STFSU:
+ case OP_STFD:
+ case OP_STFDU:
+ return true;
+ case 4:
+ /* X form */
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_4X_PS_CMPU0:
+ case OP_4X_PSQ_LX:
+ case OP_4X_PS_CMPO0:
+ case OP_4X_PSQ_LUX:
+ case OP_4X_PS_NEG:
+ case OP_4X_PS_CMPU1:
+ case OP_4X_PS_MR:
+ case OP_4X_PS_CMPO1:
+ case OP_4X_PS_NABS:
+ case OP_4X_PS_ABS:
+ case OP_4X_PS_MERGE00:
+ case OP_4X_PS_MERGE01:
+ case OP_4X_PS_MERGE10:
+ case OP_4X_PS_MERGE11:
+ return true;
+ }
+ /* XW form */
+ switch (inst_get_field(inst, 25, 30)) {
+ case OP_4XW_PSQ_STX:
+ case OP_4XW_PSQ_STUX:
+ return true;
+ }
+ /* A form */
+ switch (inst_get_field(inst, 26, 30)) {
+ case OP_4A_PS_SUM1:
+ case OP_4A_PS_SUM0:
+ case OP_4A_PS_MULS0:
+ case OP_4A_PS_MULS1:
+ case OP_4A_PS_MADDS0:
+ case OP_4A_PS_MADDS1:
+ case OP_4A_PS_DIV:
+ case OP_4A_PS_SUB:
+ case OP_4A_PS_ADD:
+ case OP_4A_PS_SEL:
+ case OP_4A_PS_RES:
+ case OP_4A_PS_MUL:
+ case OP_4A_PS_RSQRTE:
+ case OP_4A_PS_MSUB:
+ case OP_4A_PS_MADD:
+ case OP_4A_PS_NMSUB:
+ case OP_4A_PS_NMADD:
+ return true;
+ }
+ break;
+ case 59:
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_59_FADDS:
+ case OP_59_FSUBS:
+ case OP_59_FDIVS:
+ case OP_59_FRES:
+ case OP_59_FRSQRTES:
+ return true;
+ }
+ switch (inst_get_field(inst, 26, 30)) {
+ case OP_59_FMULS:
+ case OP_59_FMSUBS:
+ case OP_59_FMADDS:
+ case OP_59_FNMSUBS:
+ case OP_59_FNMADDS:
+ return true;
+ }
+ break;
+ case 63:
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_63_MTFSB0:
+ case OP_63_MTFSB1:
+ case OP_63_MTFSF:
+ case OP_63_MTFSFI:
+ case OP_63_MCRFS:
+ case OP_63_MFFS:
+ case OP_63_FCMPU:
+ case OP_63_FCMPO:
+ case OP_63_FNEG:
+ case OP_63_FMR:
+ case OP_63_FABS:
+ case OP_63_FRSP:
+ case OP_63_FDIV:
+ case OP_63_FADD:
+ case OP_63_FSUB:
+ case OP_63_FCTIW:
+ case OP_63_FCTIWZ:
+ case OP_63_FRSQRTE:
+ case OP_63_FCPSGN:
+ return true;
+ }
+ switch (inst_get_field(inst, 26, 30)) {
+ case OP_63_FMUL:
+ case OP_63_FSEL:
+ case OP_63_FMSUB:
+ case OP_63_FMADD:
+ case OP_63_FNMSUB:
+ case OP_63_FNMADD:
+ return true;
+ }
+ break;
+ case 31:
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_31_LFSX:
+ case OP_31_LFSUX:
+ case OP_31_LFDX:
+ case OP_31_LFDUX:
+ case OP_31_STFSX:
+ case OP_31_STFSUX:
+ case OP_31_STFX:
+ case OP_31_STFUX:
+ case OP_31_STFIWX:
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+static int get_d_signext(u32 inst)
+{
+ int d = inst & 0x8ff;
+
+ if (d & 0x800)
+ return -(d & 0x7ff);
+
+ return (d & 0x7ff);
+}
+
+static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
+ int reg_out, int reg_in1, int reg_in2,
+ int reg_in3, int scalar,
+ void (*func)(u64 *fpscr,
+ u32 *dst, u32 *src1,
+ u32 *src2, u32 *src3))
+{
+ u32 *qpr = vcpu->arch.qpr;
+ u64 *fpr = vcpu->arch.fpr;
+ u32 ps0_out;
+ u32 ps0_in1, ps0_in2, ps0_in3;
+ u32 ps1_in1, ps1_in2, ps1_in3;
+
+ /* RC */
+ WARN_ON(rc);
+
+ /* PS0 */
+ kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
+ kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
+ kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
+
+ if (scalar & SCALAR_LOW)
+ ps0_in2 = qpr[reg_in2];
+
+ func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
+
+ dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
+ ps0_in1, ps0_in2, ps0_in3, ps0_out);
+
+ if (!(scalar & SCALAR_NO_PS0))
+ kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+
+ /* PS1 */
+ ps1_in1 = qpr[reg_in1];
+ ps1_in2 = qpr[reg_in2];
+ ps1_in3 = qpr[reg_in3];
+
+ if (scalar & SCALAR_HIGH)
+ ps1_in2 = ps0_in2;
+
+ if (!(scalar & SCALAR_NO_PS1))
+ func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
+
+ dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
+ ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
+ int reg_out, int reg_in1, int reg_in2,
+ int scalar,
+ void (*func)(u64 *fpscr,
+ u32 *dst, u32 *src1,
+ u32 *src2))
+{
+ u32 *qpr = vcpu->arch.qpr;
+ u64 *fpr = vcpu->arch.fpr;
+ u32 ps0_out;
+ u32 ps0_in1, ps0_in2;
+ u32 ps1_out;
+ u32 ps1_in1, ps1_in2;
+
+ /* RC */
+ WARN_ON(rc);
+
+ /* PS0 */
+ kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
+
+ if (scalar & SCALAR_LOW)
+ ps0_in2 = qpr[reg_in2];
+ else
+ kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
+
+ func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
+
+ if (!(scalar & SCALAR_NO_PS0)) {
+ dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
+ ps0_in1, ps0_in2, ps0_out);
+
+ kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+ }
+
+ /* PS1 */
+ ps1_in1 = qpr[reg_in1];
+ ps1_in2 = qpr[reg_in2];
+
+ if (scalar & SCALAR_HIGH)
+ ps1_in2 = ps0_in2;
+
+ func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
+
+ if (!(scalar & SCALAR_NO_PS1)) {
+ qpr[reg_out] = ps1_out;
+
+ dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n",
+ ps1_in1, ps1_in2, qpr[reg_out]);
+ }
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
+ int reg_out, int reg_in,
+ void (*func)(u64 *t,
+ u32 *dst, u32 *src1))
+{
+ u32 *qpr = vcpu->arch.qpr;
+ u64 *fpr = vcpu->arch.fpr;
+ u32 ps0_out, ps0_in;
+ u32 ps1_in;
+
+ /* RC */
+ WARN_ON(rc);
+
+ /* PS0 */
+ kvm_cvt_df(&fpr[reg_in], &ps0_in);
+ func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
+
+ dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
+ ps0_in, ps0_out);
+
+ kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+
+ /* PS1 */
+ ps1_in = qpr[reg_in];
+ func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in);
+
+ dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
+ ps1_in, qpr[reg_out]);
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ u32 inst = kvmppc_get_last_inst(vcpu);
+ enum emulation_result emulated = EMULATE_DONE;
+
+ int ax_rd = inst_get_field(inst, 6, 10);
+ int ax_ra = inst_get_field(inst, 11, 15);
+ int ax_rb = inst_get_field(inst, 16, 20);
+ int ax_rc = inst_get_field(inst, 21, 25);
+ short full_d = inst_get_field(inst, 16, 31);
+
+ u64 *fpr_d = &vcpu->arch.fpr[ax_rd];
+ u64 *fpr_a = &vcpu->arch.fpr[ax_ra];
+ u64 *fpr_b = &vcpu->arch.fpr[ax_rb];
+ u64 *fpr_c = &vcpu->arch.fpr[ax_rc];
+
+ bool rcomp = (inst & 1) ? true : false;
+ u32 cr = kvmppc_get_cr(vcpu);
+#ifdef DEBUG
+ int i;
+#endif
+
+ if (!kvmppc_inst_is_paired_single(vcpu, inst))
+ return EMULATE_FAIL;
+
+ if (!(vcpu->arch.shared->msr & MSR_FP)) {
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
+ return EMULATE_AGAIN;
+ }
+
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ preempt_disable();
+ enable_kernel_fp();
+ /* Do we need to clear FE0 / FE1 here? Don't think so. */
+
+#ifdef DEBUG
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+ u32 f;
+ kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+ dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n",
+ i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
+ }
+#endif
+
+ switch (get_op(inst)) {
+ case OP_PSQ_L:
+ {
+ ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+ bool w = inst_get_field(inst, 16, 16) ? true : false;
+ int i = inst_get_field(inst, 17, 19);
+
+ addr += get_d_signext(inst);
+ emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ break;
+ }
+ case OP_PSQ_LU:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+ bool w = inst_get_field(inst, 16, 16) ? true : false;
+ int i = inst_get_field(inst, 17, 19);
+
+ addr += get_d_signext(inst);
+ emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_PSQ_ST:
+ {
+ ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+ bool w = inst_get_field(inst, 16, 16) ? true : false;
+ int i = inst_get_field(inst, 17, 19);
+
+ addr += get_d_signext(inst);
+ emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ break;
+ }
+ case OP_PSQ_STU:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+ bool w = inst_get_field(inst, 16, 16) ? true : false;
+ int i = inst_get_field(inst, 17, 19);
+
+ addr += get_d_signext(inst);
+ emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case 4:
+ /* X form */
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_4X_PS_CMPU0:
+ /* XXX */
+ emulated = EMULATE_FAIL;
+ break;
+ case OP_4X_PSQ_LX:
+ {
+ ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+ bool w = inst_get_field(inst, 21, 21) ? true : false;
+ int i = inst_get_field(inst, 22, 24);
+
+ addr += kvmppc_get_gpr(vcpu, ax_rb);
+ emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ break;
+ }
+ case OP_4X_PS_CMPO0:
+ /* XXX */
+ emulated = EMULATE_FAIL;
+ break;
+ case OP_4X_PSQ_LUX:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+ bool w = inst_get_field(inst, 21, 21) ? true : false;
+ int i = inst_get_field(inst, 22, 24);
+
+ addr += kvmppc_get_gpr(vcpu, ax_rb);
+ emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_4X_PS_NEG:
+ vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
+ vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL;
+ vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+ vcpu->arch.qpr[ax_rd] ^= 0x80000000;
+ break;
+ case OP_4X_PS_CMPU1:
+ /* XXX */
+ emulated = EMULATE_FAIL;
+ break;
+ case OP_4X_PS_MR:
+ WARN_ON(rcomp);
+ vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
+ vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+ break;
+ case OP_4X_PS_CMPO1:
+ /* XXX */
+ emulated = EMULATE_FAIL;
+ break;
+ case OP_4X_PS_NABS:
+ WARN_ON(rcomp);
+ vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
+ vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL;
+ vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+ vcpu->arch.qpr[ax_rd] |= 0x80000000;
+ break;
+ case OP_4X_PS_ABS:
+ WARN_ON(rcomp);
+ vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
+ vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL;
+ vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+ vcpu->arch.qpr[ax_rd] &= ~0x80000000;
+ break;
+ case OP_4X_PS_MERGE00:
+ WARN_ON(rcomp);
+ vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
+ /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
+ kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+ &vcpu->arch.qpr[ax_rd]);
+ break;
+ case OP_4X_PS_MERGE01:
+ WARN_ON(rcomp);
+ vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
+ vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+ break;
+ case OP_4X_PS_MERGE10:
+ WARN_ON(rcomp);
+ /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
+ &vcpu->arch.fpr[ax_rd]);
+ /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
+ kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+ &vcpu->arch.qpr[ax_rd]);
+ break;
+ case OP_4X_PS_MERGE11:
+ WARN_ON(rcomp);
+ /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
+ &vcpu->arch.fpr[ax_rd]);
+ vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+ break;
+ }
+ /* XW form */
+ switch (inst_get_field(inst, 25, 30)) {
+ case OP_4XW_PSQ_STX:
+ {
+ ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+ bool w = inst_get_field(inst, 21, 21) ? true : false;
+ int i = inst_get_field(inst, 22, 24);
+
+ addr += kvmppc_get_gpr(vcpu, ax_rb);
+ emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ break;
+ }
+ case OP_4XW_PSQ_STUX:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+ bool w = inst_get_field(inst, 21, 21) ? true : false;
+ int i = inst_get_field(inst, 22, 24);
+
+ addr += kvmppc_get_gpr(vcpu, ax_rb);
+ emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ }
+ /* A form */
+ switch (inst_get_field(inst, 26, 30)) {
+ case OP_4A_PS_SUM1:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds);
+ vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc];
+ break;
+ case OP_4A_PS_SUM0:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds);
+ vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc];
+ break;
+ case OP_4A_PS_MULS0:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls);
+ break;
+ case OP_4A_PS_MULS1:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, SCALAR_LOW, fps_fmuls);
+ break;
+ case OP_4A_PS_MADDS0:
+ emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds);
+ break;
+ case OP_4A_PS_MADDS1:
+ emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds);
+ break;
+ case OP_4A_PS_DIV:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rb, SCALAR_NONE, fps_fdivs);
+ break;
+ case OP_4A_PS_SUB:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rb, SCALAR_NONE, fps_fsubs);
+ break;
+ case OP_4A_PS_ADD:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rb, SCALAR_NONE, fps_fadds);
+ break;
+ case OP_4A_PS_SEL:
+ emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel);
+ break;
+ case OP_4A_PS_RES:
+ emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd,
+ ax_rb, fps_fres);
+ break;
+ case OP_4A_PS_MUL:
+ emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, SCALAR_NONE, fps_fmuls);
+ break;
+ case OP_4A_PS_RSQRTE:
+ emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd,
+ ax_rb, fps_frsqrte);
+ break;
+ case OP_4A_PS_MSUB:
+ emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs);
+ break;
+ case OP_4A_PS_MADD:
+ emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds);
+ break;
+ case OP_4A_PS_NMSUB:
+ emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs);
+ break;
+ case OP_4A_PS_NMADD:
+ emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+ ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds);
+ break;
+ }
+ break;
+
+ /* Real FPU operations */
+
+ case OP_LFS:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ FPU_LS_SINGLE);
+ break;
+ }
+ case OP_LFSU:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ FPU_LS_SINGLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_LFD:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ FPU_LS_DOUBLE);
+ break;
+ }
+ case OP_LFDU:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ FPU_LS_DOUBLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_STFS:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ FPU_LS_SINGLE);
+ break;
+ }
+ case OP_STFSU:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ FPU_LS_SINGLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_STFD:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ FPU_LS_DOUBLE);
+ break;
+ }
+ case OP_STFDU:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ FPU_LS_DOUBLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case 31:
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_31_LFSX:
+ {
+ ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+
+ addr += kvmppc_get_gpr(vcpu, ax_rb);
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ addr, FPU_LS_SINGLE);
+ break;
+ }
+ case OP_31_LFSUX:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ addr, FPU_LS_SINGLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_31_LFDX:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ addr, FPU_LS_DOUBLE);
+ break;
+ }
+ case OP_31_LFDUX:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ addr, FPU_LS_DOUBLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_31_STFSX:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ addr, FPU_LS_SINGLE);
+ break;
+ }
+ case OP_31_STFSUX:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ addr, FPU_LS_SINGLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_31_STFX:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ addr, FPU_LS_DOUBLE);
+ break;
+ }
+ case OP_31_STFUX:
+ {
+ ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ addr, FPU_LS_DOUBLE);
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, ax_ra, addr);
+ break;
+ }
+ case OP_31_STFIWX:
+ {
+ ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+ kvmppc_get_gpr(vcpu, ax_rb);
+
+ emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ addr,
+ FPU_LS_SINGLE_LOW);
+ break;
+ }
+ break;
+ }
+ break;
+ case 59:
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_59_FADDS:
+ fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FSUBS:
+ fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FDIVS:
+ fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FRES:
+ fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FRSQRTES:
+ fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ }
+ switch (inst_get_field(inst, 26, 30)) {
+ case OP_59_FMULS:
+ fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FMSUBS:
+ fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FMADDS:
+ fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FNMSUBS:
+ fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_59_FNMADDS:
+ fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ }
+ break;
+ case 63:
+ switch (inst_get_field(inst, 21, 30)) {
+ case OP_63_MTFSB0:
+ case OP_63_MTFSB1:
+ case OP_63_MCRFS:
+ case OP_63_MTFSFI:
+ /* XXX need to implement */
+ break;
+ case OP_63_MFFS:
+ /* XXX missing CR */
+ *fpr_d = vcpu->arch.fpscr;
+ break;
+ case OP_63_MTFSF:
+ /* XXX missing fm bits */
+ /* XXX missing CR */
+ vcpu->arch.fpscr = *fpr_b;
+ break;
+ case OP_63_FCMPU:
+ {
+ u32 tmp_cr;
+ u32 cr0_mask = 0xf0000000;
+ u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
+
+ fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+ cr &= ~(cr0_mask >> cr_shift);
+ cr |= (cr & cr0_mask) >> cr_shift;
+ break;
+ }
+ case OP_63_FCMPO:
+ {
+ u32 tmp_cr;
+ u32 cr0_mask = 0xf0000000;
+ u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
+
+ fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+ cr &= ~(cr0_mask >> cr_shift);
+ cr |= (cr & cr0_mask) >> cr_shift;
+ break;
+ }
+ case OP_63_FNEG:
+ fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ break;
+ case OP_63_FMR:
+ *fpr_d = *fpr_b;
+ break;
+ case OP_63_FABS:
+ fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ break;
+ case OP_63_FCPSGN:
+ fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ break;
+ case OP_63_FDIV:
+ fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ break;
+ case OP_63_FADD:
+ fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ break;
+ case OP_63_FSUB:
+ fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ break;
+ case OP_63_FCTIW:
+ fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ break;
+ case OP_63_FCTIWZ:
+ fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ break;
+ case OP_63_FRSP:
+ fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ kvmppc_sync_qpr(vcpu, ax_rd);
+ break;
+ case OP_63_FRSQRTE:
+ {
+ double one = 1.0f;
+
+ /* fD = sqrt(fB) */
+ fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ /* fD = 1.0f / fD */
+ fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
+ break;
+ }
+ }
+ switch (inst_get_field(inst, 26, 30)) {
+ case OP_63_FMUL:
+ fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+ break;
+ case OP_63_FSEL:
+ fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ break;
+ case OP_63_FMSUB:
+ fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ break;
+ case OP_63_FMADD:
+ fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ break;
+ case OP_63_FNMSUB:
+ fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ break;
+ case OP_63_FNMADD:
+ fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ break;
+ }
+ break;
+ }
+
+#ifdef DEBUG
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+ u32 f;
+ kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+ dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
+ }
+#endif
+
+ if (rcomp)
+ kvmppc_set_cr(vcpu, cr);
+
+ preempt_enable();
+
+ return emulated;
+}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
new file mode 100644
index 00000000..7759053d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -0,0 +1,1176 @@
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ * Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ * Functions relating to running KVM on Book 3S processors where
+ * we don't have access to hypervisor mode, and we run the guest
+ * in problem state (user mode).
+ *
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+
+#include "trace.h"
+
+/* #define EXIT_DEBUG */
+/* #define DEBUG_EXT */
+
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+ ulong msr);
+
+/* Some compatibility defines */
+#ifdef CONFIG_PPC_BOOK3S_32
+#define MSR_USER32 MSR_USER
+#define MSR_USER64 MSR_USER
+#define HW_PAGE_SIZE PAGE_SIZE
+#define __hard_irq_disable local_irq_disable
+#define __hard_irq_enable local_irq_enable
+#endif
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
+ memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
+ sizeof(get_paca()->shadow_vcpu));
+ svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
+ svcpu_put(svcpu);
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+#endif
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
+ memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
+ sizeof(get_paca()->shadow_vcpu));
+ to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
+ svcpu_put(svcpu);
+#endif
+
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ kvmppc_giveup_ext(vcpu, MSR_VEC);
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
+}
+
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+ ulong smsr = vcpu->arch.shared->msr;
+
+ /* Guest MSR values */
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
+ /* Process MSR values */
+ smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
+ /* External providers the guest reserved */
+ smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
+ /* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+ smsr |= MSR_ISF | MSR_HV;
+#endif
+ vcpu->arch.shadow_msr = smsr;
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+ ulong old_msr = vcpu->arch.shared->msr;
+
+#ifdef EXIT_DEBUG
+ printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
+#endif
+
+ msr &= to_book3s(vcpu)->msr_mask;
+ vcpu->arch.shared->msr = msr;
+ kvmppc_recalc_shadow_msr(vcpu);
+
+ if (msr & MSR_POW) {
+ if (!vcpu->arch.pending_exceptions) {
+ kvm_vcpu_block(vcpu);
+ vcpu->stat.halt_wakeup++;
+
+ /* Unset POW bit after we woke up */
+ msr &= ~MSR_POW;
+ vcpu->arch.shared->msr = msr;
+ }
+ }
+
+ if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+ (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
+ kvmppc_mmu_flush_segments(vcpu);
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+ /* Preload magic page segment when in kernel mode */
+ if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+ struct kvm_vcpu_arch *a = &vcpu->arch;
+
+ if (msr & MSR_DR)
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+ else
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+ }
+ }
+
+ /* Preload FPU if it's enabled */
+ if (vcpu->arch.shared->msr & MSR_FP)
+ kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+}
+
+void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+{
+ u32 host_pvr;
+
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
+ vcpu->arch.pvr = pvr;
+#ifdef CONFIG_PPC_BOOK3S_64
+ if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
+ kvmppc_mmu_book3s_64_init(vcpu);
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0xfff00000;
+ to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
+ vcpu->arch.cpu_type = KVM_CPU_3S_64;
+ } else
+#endif
+ {
+ kvmppc_mmu_book3s_32_init(vcpu);
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0;
+ to_book3s(vcpu)->msr_mask = 0xffffffffULL;
+ vcpu->arch.cpu_type = KVM_CPU_3S_32;
+ }
+
+ kvmppc_sanity_check(vcpu);
+
+ /* If we are in hypervisor level on 970, we can tell the CPU to
+ * treat DCBZ as 32 bytes store */
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
+ if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
+ !strcmp(cur_cpu_spec->platform, "ppc970"))
+ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+
+ /* Cell performs badly if MSR_FEx are set. So let's hope nobody
+ really needs them in a VM on Cell and force disable them. */
+ if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
+ to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* 32 bit Book3S always has 32 byte dcbz */
+ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+#endif
+
+ /* On some CPUs we can execute paired single operations natively */
+ asm ( "mfpvr %0" : "=r"(host_pvr));
+ switch (host_pvr) {
+ case 0x00080200: /* lonestar 2.0 */
+ case 0x00088202: /* lonestar 2.2 */
+ case 0x70000100: /* gekko 1.0 */
+ case 0x00080100: /* gekko 2.0 */
+ case 0x00083203: /* gekko 2.3a */
+ case 0x00083213: /* gekko 2.3b */
+ case 0x00083204: /* gekko 2.4 */
+ case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
+ case 0x00087200: /* broadway */
+ vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
+ /* Enable HID2.PSE - in case we need it later */
+ mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
+ }
+}
+
+/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
+ * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
+ * emulate 32 bytes dcbz length.
+ *
+ * The Book3s_64 inventors also realized this case and implemented a special bit
+ * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
+ *
+ * My approach here is to patch the dcbz instruction on executing pages.
+ */
+static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+ struct page *hpage;
+ u64 hpage_offset;
+ u32 *page;
+ int i;
+
+ hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+ if (is_error_page(hpage)) {
+ kvm_release_page_clean(hpage);
+ return;
+ }
+
+ hpage_offset = pte->raddr & ~PAGE_MASK;
+ hpage_offset &= ~0xFFFULL;
+ hpage_offset /= 4;
+
+ get_page(hpage);
+ page = kmap_atomic(hpage);
+
+ /* patch dcbz into reserved instruction, so we trap */
+ for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+ if ((page[i] & 0xff0007ff) == INS_DCBZ)
+ page[i] &= 0xfffffff7;
+
+ kunmap_atomic(page);
+ put_page(hpage);
+}
+
+static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ ulong mp_pa = vcpu->arch.magic_page_pa;
+
+ if (unlikely(mp_pa) &&
+ unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+ return 1;
+ }
+
+ return kvm_is_visible_gfn(vcpu->kvm, gfn);
+}
+
+int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ ulong eaddr, int vec)
+{
+ bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+ int r = RESUME_GUEST;
+ int relocated;
+ int page_found = 0;
+ struct kvmppc_pte pte;
+ bool is_mmio = false;
+ bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
+ bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
+ u64 vsid;
+
+ relocated = data ? dr : ir;
+
+ /* Resolve real address if translation turned on */
+ if (relocated) {
+ page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+ } else {
+ pte.may_execute = true;
+ pte.may_read = true;
+ pte.may_write = true;
+ pte.raddr = eaddr & KVM_PAM;
+ pte.eaddr = eaddr;
+ pte.vpage = eaddr >> 12;
+ }
+
+ switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ case 0:
+ pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
+ break;
+ case MSR_DR:
+ case MSR_IR:
+ vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+
+ if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
+ pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
+ else
+ pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
+ pte.vpage |= vsid;
+
+ if (vsid == -1)
+ page_found = -EINVAL;
+ break;
+ }
+
+ if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+ /*
+ * If we do the dcbz hack, we have to NX on every execution,
+ * so we can patch the executing code. This renders our guest
+ * NX-less.
+ */
+ pte.may_execute = !data;
+ }
+
+ if (page_found == -ENOENT) {
+ /* Page not found in guest PTE entries */
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
+ vcpu->arch.shared->msr |=
+ (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
+ svcpu_put(svcpu);
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
+ } else if (page_found == -EPERM) {
+ /* Storage protection */
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
+ vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
+ vcpu->arch.shared->msr |=
+ svcpu->shadow_srr1 & 0x00000000f8000000ULL;
+ svcpu_put(svcpu);
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
+ } else if (page_found == -EINVAL) {
+ /* Page not found in guest SLB */
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
+ } else if (!is_mmio &&
+ kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+ /* The guest's PTE is not mapped yet. Map on the host */
+ kvmppc_mmu_map_page(vcpu, &pte);
+ if (data)
+ vcpu->stat.sp_storage++;
+ else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+ kvmppc_patch_dcbz(vcpu, &pte);
+ } else {
+ /* MMIO */
+ vcpu->stat.mmio_exits++;
+ vcpu->arch.paddr_accessed = pte.raddr;
+ r = kvmppc_emulate_mmio(run, vcpu);
+ if ( r == RESUME_HOST_NV )
+ r = RESUME_HOST;
+ }
+
+ return r;
+}
+
+static inline int get_fpr_index(int i)
+{
+#ifdef CONFIG_VSX
+ i *= 2;
+#endif
+ return i;
+}
+
+/* Give up external provider (FPU, Altivec, VSX) */
+void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+{
+ struct thread_struct *t = &current->thread;
+ u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
+ u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
+ u64 *thread_fpr = (u64*)t->fpr;
+ int i;
+
+ if (!(vcpu->arch.guest_owned_ext & msr))
+ return;
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
+#endif
+
+ switch (msr) {
+ case MSR_FP:
+ giveup_fpu(current);
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+ vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
+
+ vcpu->arch.fpscr = t->fpscr.val;
+ break;
+ case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+ giveup_altivec(current);
+ memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
+ vcpu->arch.vscr = t->vscr;
+#endif
+ break;
+ case MSR_VSX:
+#ifdef CONFIG_VSX
+ __giveup_vsx(current);
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+ vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+ break;
+ default:
+ BUG();
+ }
+
+ vcpu->arch.guest_owned_ext &= ~msr;
+ current->thread.regs->msr &= ~msr;
+ kvmppc_recalc_shadow_msr(vcpu);
+}
+
+static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
+{
+ ulong srr0 = kvmppc_get_pc(vcpu);
+ u32 last_inst = kvmppc_get_last_inst(vcpu);
+ int ret;
+
+ ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
+ if (ret == -ENOENT) {
+ ulong msr = vcpu->arch.shared->msr;
+
+ msr = kvmppc_set_field(msr, 33, 33, 1);
+ msr = kvmppc_set_field(msr, 34, 36, 0);
+ vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+ return EMULATE_AGAIN;
+ }
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+
+ /* Need to do paired single emulation? */
+ if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
+ return EMULATE_DONE;
+
+ /* Read out the instruction */
+ if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
+ /* Need to emulate */
+ return EMULATE_FAIL;
+
+ return EMULATE_AGAIN;
+}
+
+/* Handle external providers (FPU, Altivec, VSX) */
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+ ulong msr)
+{
+ struct thread_struct *t = &current->thread;
+ u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
+ u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
+ u64 *thread_fpr = (u64*)t->fpr;
+ int i;
+
+ /* When we have paired singles, we emulate in software */
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
+ return RESUME_GUEST;
+
+ if (!(vcpu->arch.shared->msr & msr)) {
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ return RESUME_GUEST;
+ }
+
+ /* We already own the ext */
+ if (vcpu->arch.guest_owned_ext & msr) {
+ return RESUME_GUEST;
+ }
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
+#endif
+
+ current->thread.regs->msr |= msr;
+
+ switch (msr) {
+ case MSR_FP:
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+ thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
+
+ t->fpscr.val = vcpu->arch.fpscr;
+ t->fpexc_mode = 0;
+ kvmppc_load_up_fpu();
+ break;
+ case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+ memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
+ t->vscr = vcpu->arch.vscr;
+ t->vrsave = -1;
+ kvmppc_load_up_altivec();
+#endif
+ break;
+ case MSR_VSX:
+#ifdef CONFIG_VSX
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+ thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+ kvmppc_load_up_vsx();
+#endif
+ break;
+ default:
+ BUG();
+ }
+
+ vcpu->arch.guest_owned_ext |= msr;
+
+ kvmppc_recalc_shadow_msr(vcpu);
+
+ return RESUME_GUEST;
+}
+
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ int r = RESUME_HOST;
+
+ vcpu->stat.sum_exits++;
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+
+ trace_kvm_book3s_exit(exit_nr, vcpu);
+ preempt_enable();
+ kvm_resched(vcpu);
+ switch (exit_nr) {
+ case BOOK3S_INTERRUPT_INST_STORAGE:
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong shadow_srr1 = svcpu->shadow_srr1;
+ vcpu->stat.pf_instruc++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* We set segments as unused segments when invalidating them. So
+ * treat the respective fault as segment fault. */
+ if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+ r = RESUME_GUEST;
+ svcpu_put(svcpu);
+ break;
+ }
+#endif
+ svcpu_put(svcpu);
+
+ /* only care about PTEG not found errors, but leave NX alone */
+ if (shadow_srr1 & 0x40000000) {
+ r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+ vcpu->stat.sp_instruc++;
+ } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+ /*
+ * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
+ * so we can't use the NX bit inside the guest. Let's cross our fingers,
+ * that no guest that needs the dcbz hack does NX.
+ */
+ kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
+ r = RESUME_GUEST;
+ } else {
+ vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000;
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_DATA_STORAGE:
+ {
+ ulong dar = kvmppc_get_fault_dar(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ u32 fault_dsisr = svcpu->fault_dsisr;
+ vcpu->stat.pf_storage++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* We set segments as unused segments when invalidating them. So
+ * treat the respective fault as segment fault. */
+ if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, dar);
+ r = RESUME_GUEST;
+ svcpu_put(svcpu);
+ break;
+ }
+#endif
+ svcpu_put(svcpu);
+
+ /* The only case we need to handle is missing shadow PTEs */
+ if (fault_dsisr & DSISR_NOHPTE) {
+ r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+ } else {
+ vcpu->arch.shared->dar = dar;
+ vcpu->arch.shared->dsisr = fault_dsisr;
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_DATA_SEGMENT:
+ if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
+ vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_DATA_SEGMENT);
+ }
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_INST_SEGMENT:
+ if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_INST_SEGMENT);
+ }
+ r = RESUME_GUEST;
+ break;
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_DECREMENTER:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PERFMON:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PROGRAM:
+ {
+ enum emulation_result er;
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+ ulong flags;
+
+program_interrupt:
+ svcpu = svcpu_get(vcpu);
+ flags = svcpu->shadow_srr1 & 0x1f0000ull;
+ svcpu_put(svcpu);
+
+ if (vcpu->arch.shared->msr & MSR_PR) {
+#ifdef EXIT_DEBUG
+ printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+#endif
+ if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
+ (INS_DCBZ & 0xfffffff7)) {
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+ }
+
+ vcpu->stat.emulated_inst_exits++;
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_AGAIN:
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_FAIL:
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST_NV;
+ break;
+ default:
+ BUG();
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_SYSCALL:
+ if (vcpu->arch.papr_enabled &&
+ (kvmppc_get_last_inst(vcpu) == 0x44000022) &&
+ !(vcpu->arch.shared->msr & MSR_PR)) {
+ /* SC 1 papr hypercalls */
+ ulong cmd = kvmppc_get_gpr(vcpu, 3);
+ int i;
+
+#ifdef CONFIG_KVM_BOOK3S_64_PR
+ if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
+ r = RESUME_GUEST;
+ break;
+ }
+#endif
+
+ run->papr_hcall.nr = cmd;
+ for (i = 0; i < 9; ++i) {
+ ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
+ run->papr_hcall.args[i] = gpr;
+ }
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ r = RESUME_HOST;
+ } else if (vcpu->arch.osi_enabled &&
+ (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
+ (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+ /* MOL hypercalls */
+ u64 *gprs = run->osi.gprs;
+ int i;
+
+ run->exit_reason = KVM_EXIT_OSI;
+ for (i = 0; i < 32; i++)
+ gprs[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu->arch.osi_needed = 1;
+ r = RESUME_HOST_NV;
+ } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+ /* KVM PV hypercalls */
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ r = RESUME_GUEST;
+ } else {
+ /* Guest syscalls */
+ vcpu->stat.syscall_exits++;
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ case BOOK3S_INTERRUPT_FP_UNAVAIL:
+ case BOOK3S_INTERRUPT_ALTIVEC:
+ case BOOK3S_INTERRUPT_VSX:
+ {
+ int ext_msr = 0;
+
+ switch (exit_nr) {
+ case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
+ case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
+ case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
+ }
+
+ switch (kvmppc_check_ext(vcpu, exit_nr)) {
+ case EMULATE_DONE:
+ /* everything ok - let's enable the ext */
+ r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
+ break;
+ case EMULATE_FAIL:
+ /* we need to emulate this instruction */
+ goto program_interrupt;
+ break;
+ default:
+ /* nothing to worry about - go again */
+ break;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_ALIGNMENT:
+ if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
+ vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
+ kvmppc_get_last_inst(vcpu));
+ vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
+ kvmppc_get_last_inst(vcpu));
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ }
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ case BOOK3S_INTERRUPT_TRACE:
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ break;
+ default:
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong shadow_srr1 = svcpu->shadow_srr1;
+ svcpu_put(svcpu);
+ /* Ugh - bork here! What did we get? */
+ printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
+ exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
+ r = RESUME_HOST;
+ BUG();
+ break;
+ }
+ }
+
+ preempt_disable();
+ if (!(r & RESUME_HOST)) {
+ /* To avoid clobbering exit_reason, only check for signals if
+ * we aren't already exiting to userspace for some other
+ * reason. */
+
+ /*
+ * Interrupts could be timers for the guest which we have to
+ * inject again, so let's postpone them until we're in the guest
+ * and if we really did time things so badly, then we just exit
+ * again due to a host external interrupt.
+ */
+ __hard_irq_disable();
+ if (signal_pending(current)) {
+ __hard_irq_enable();
+#ifdef EXIT_DEBUG
+ printk(KERN_EMERG "KVM: Going back to host\n");
+#endif
+ vcpu->stat.signal_exits++;
+ run->exit_reason = KVM_EXIT_INTR;
+ r = -EINTR;
+ } else {
+ /* In case an interrupt came in that was triggered
+ * from userspace (like DEC), we need to check what
+ * to inject now! */
+ kvmppc_core_prepare_to_enter(vcpu);
+ }
+ }
+
+ trace_kvm_book3s_reenter(r, vcpu);
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
+ sregs->pvr = vcpu->arch.pvr;
+
+ sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+ for (i = 0; i < 64; i++) {
+ sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
+ sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+ }
+ } else {
+ for (i = 0; i < 16; i++)
+ sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+
+ for (i = 0; i < 8; i++) {
+ sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
+ sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+ }
+ }
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
+ kvmppc_set_pvr(vcpu, sregs->pvr);
+
+ vcpu3s->sdr1 = sregs->u.s.sdr1;
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+ for (i = 0; i < 64; i++) {
+ vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
+ sregs->u.s.ppc64.slb[i].slbe);
+ }
+ } else {
+ for (i = 0; i < 16; i++) {
+ vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
+ }
+ for (i = 0; i < 8; i++) {
+ kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
+ (u32)sregs->u.s.ppc32.ibat[i]);
+ kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
+ (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
+ kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
+ (u32)sregs->u.s.ppc32.dbat[i]);
+ kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
+ (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
+ }
+ }
+
+ /* Flush the MMU after messing with the segments */
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+ return 0;
+}
+
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = copy_to_user((u64 __user *)(long)reg->addr,
+ &to_book3s(vcpu)->hior, sizeof(u64));
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = copy_from_user(&to_book3s(vcpu)->hior,
+ (u64 __user *)(long)reg->addr, sizeof(u64));
+ if (!r)
+ to_book3s(vcpu)->hior_explicit = true;
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s;
+ struct kvm_vcpu *vcpu;
+ int err = -ENOMEM;
+ unsigned long p;
+
+ vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+ if (!vcpu_book3s)
+ goto out;
+
+ vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
+ kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
+ if (!vcpu_book3s->shadow_vcpu)
+ goto free_vcpu;
+
+ vcpu = &vcpu_book3s->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_shadow_vcpu;
+
+ p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+ /* the real shared page fills the last 4k of our page */
+ vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
+ if (!p)
+ goto uninit_vcpu;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* default to book3s_64 (970fx) */
+ vcpu->arch.pvr = 0x3C0301;
+#else
+ /* default to book3s_32 (750) */
+ vcpu->arch.pvr = 0x84202;
+#endif
+ kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+ vcpu->arch.slb_nr = 64;
+
+ vcpu->arch.shadow_msr = MSR_USER64;
+
+ err = kvmppc_mmu_init(vcpu);
+ if (err < 0)
+ goto uninit_vcpu;
+
+ return vcpu;
+
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+free_shadow_vcpu:
+ kfree(vcpu_book3s->shadow_vcpu);
+free_vcpu:
+ vfree(vcpu_book3s);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
+ kvm_vcpu_uninit(vcpu);
+ kfree(vcpu_book3s->shadow_vcpu);
+ vfree(vcpu_book3s);
+}
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int ret;
+ double fpr[32][TS_FPRWIDTH];
+ unsigned int fpscr;
+ int fpexc_mode;
+#ifdef CONFIG_ALTIVEC
+ vector128 vr[32];
+ vector128 vscr;
+ unsigned long uninitialized_var(vrsave);
+ int used_vr;
+#endif
+#ifdef CONFIG_VSX
+ int used_vsr;
+#endif
+ ulong ext_msr;
+
+ preempt_disable();
+
+ /* Check if we can run the vcpu at all */
+ if (!vcpu->arch.sane) {
+ kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ /*
+ * Interrupts could be timers for the guest which we have to inject
+ * again, so let's postpone them until we're in the guest and if we
+ * really did time things so badly, then we just exit again due to
+ * a host external interrupt.
+ */
+ __hard_irq_disable();
+
+ /* No need to go into the guest when all we do is going out */
+ if (signal_pending(current)) {
+ __hard_irq_enable();
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ ret = -EINTR;
+ goto out;
+ }
+
+ /* Save FPU state in stack */
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
+ memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
+ fpscr = current->thread.fpscr.val;
+ fpexc_mode = current->thread.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+ /* Save Altivec state in stack */
+ used_vr = current->thread.used_vr;
+ if (used_vr) {
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+ memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
+ vscr = current->thread.vscr;
+ vrsave = current->thread.vrsave;
+ }
+#endif
+
+#ifdef CONFIG_VSX
+ /* Save VSX state in stack */
+ used_vsr = current->thread.used_vsr;
+ if (used_vsr && (current->thread.regs->msr & MSR_VSX))
+ __giveup_vsx(current);
+#endif
+
+ /* Remember the MSR with disabled extensions */
+ ext_msr = current->thread.regs->msr;
+
+ /* Preload FPU if it's enabled */
+ if (vcpu->arch.shared->msr & MSR_FP)
+ kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
+ kvm_guest_enter();
+
+ ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+
+ kvm_guest_exit();
+
+ current->thread.regs->msr = ext_msr;
+
+ /* Make sure we save the guest FPU/Altivec/VSX state */
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ kvmppc_giveup_ext(vcpu, MSR_VEC);
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
+
+ /* Restore FPU state from stack */
+ memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
+ current->thread.fpscr.val = fpscr;
+ current->thread.fpexc_mode = fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+ /* Restore Altivec state from stack */
+ if (used_vr && current->thread.used_vr) {
+ memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
+ current->thread.vscr = vscr;
+ current->thread.vrsave = vrsave;
+ }
+ current->thread.used_vr = used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+ current->thread.used_vsr = used_vsr;
+#endif
+
+out:
+ preempt_enable();
+ return ret;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ struct kvm_memory_slot *memslot;
+ struct kvm_vcpu *vcpu;
+ ulong ga, ga_end;
+ int is_dirty = 0;
+ int r;
+ unsigned long n;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* If nothing is dirty, don't bother messing with page tables. */
+ if (is_dirty) {
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+
+ ga = memslot->base_gfn << PAGE_SHIFT;
+ ga_end = ga + (memslot->npages << PAGE_SHIFT);
+
+ kvm_for_each_vcpu(n, vcpu, kvm)
+ kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+ return 0;
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
+static int kvmppc_book3s_init(void)
+{
+ int r;
+
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
+ THIS_MODULE);
+
+ if (r)
+ return r;
+
+ r = kvmppc_mmu_hpte_sysinit();
+
+ return r;
+}
+
+static void kvmppc_book3s_exit(void)
+{
+ kvmppc_mmu_hpte_sysexit();
+ kvm_exit();
+}
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
new file mode 100644
index 00000000..b9589324
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2011. Freescale Inc. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ *
+ * Hypercall handling for running PAPR guests in PR KVM on Book 3S
+ * processors.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ unsigned long pteg_addr;
+
+ pte_index <<= 4;
+ pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
+ pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+ pteg_addr |= pte_index;
+
+ return pteg_addr;
+}
+
+static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
+{
+ long flags = kvmppc_get_gpr(vcpu, 4);
+ long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long pteg[2 * 8];
+ unsigned long pteg_addr, i, *hpte;
+
+ pte_index &= ~7UL;
+ pteg_addr = get_pteg_addr(vcpu, pte_index);
+
+ copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
+ hpte = pteg;
+
+ if (likely((flags & H_EXACT) == 0)) {
+ pte_index &= ~7UL;
+ for (i = 0; ; ++i) {
+ if (i == 8)
+ return H_PTEG_FULL;
+ if ((*hpte & HPTE_V_VALID) == 0)
+ break;
+ hpte += 2;
+ }
+ } else {
+ i = kvmppc_get_gpr(vcpu, 5) & 7UL;
+ hpte += i * 2;
+ }
+
+ hpte[0] = kvmppc_get_gpr(vcpu, 6);
+ hpte[1] = kvmppc_get_gpr(vcpu, 7);
+ copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ kvmppc_set_gpr(vcpu, 4, pte_index | i);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags= kvmppc_get_gpr(vcpu, 4);
+ unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+ unsigned long v = 0, pteg, rb;
+ unsigned long pte[2];
+
+ pteg = get_pteg_addr(vcpu, pte_index);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
+ ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
+ kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
+ return EMULATE_DONE;
+ }
+
+ copy_to_user((void __user *)pteg, &v, sizeof(v));
+
+ rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ kvmppc_set_gpr(vcpu, 4, pte[0]);
+ kvmppc_set_gpr(vcpu, 5, pte[1]);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags = kvmppc_get_gpr(vcpu, 4);
+ unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+ unsigned long rb, pteg, r, v;
+ unsigned long pte[2];
+
+ pteg = get_pteg_addr(vcpu, pte_index);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
+ kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
+ return EMULATE_DONE;
+ }
+
+ v = pte[0];
+ r = pte[1];
+ r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
+ HPTE_R_KEY_LO);
+ r |= (flags << 55) & HPTE_R_PP0;
+ r |= (flags << 48) & HPTE_R_KEY_HI;
+ r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+ pte[1] = r;
+
+ rb = compute_tlbie_rb(v, r, pte_index);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+ copy_to_user((void __user *)pteg, pte, sizeof(pte));
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
+{
+ switch (cmd) {
+ case H_ENTER:
+ return kvmppc_h_pr_enter(vcpu);
+ case H_REMOVE:
+ return kvmppc_h_pr_remove(vcpu);
+ case H_PROTECT:
+ return kvmppc_h_pr_protect(vcpu);
+ case H_BULK_REMOVE:
+ /* We just flush all PTEs, so user space can
+ handle the HPT modifications */
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+ break;
+ case H_CEDE:
+ kvm_vcpu_block(vcpu);
+ vcpu->stat.halt_wakeup++;
+ return EMULATE_DONE;
+ }
+
+ return EMULATE_FAIL;
+}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
new file mode 100644
index 00000000..34187585
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -0,0 +1,242 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/exception-64s.h>
+#endif
+
+/*****************************************************************************
+ * *
+ * Real Mode handlers that need to be in low physical memory *
+ * *
+ ****************************************************************************/
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+
+#define FUNC(name) GLUE(.,name)
+#define MTMSR_EERI(reg) mtmsrd (reg),1
+
+ .globl kvmppc_skip_interrupt
+kvmppc_skip_interrupt:
+ /*
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
+ */
+ mfspr r13, SPRN_SRR0
+ addi r13, r13, 4
+ mtspr SPRN_SRR0, r13
+ GET_SCRATCH0(r13)
+ rfid
+ b .
+
+ .globl kvmppc_skip_Hinterrupt
+kvmppc_skip_Hinterrupt:
+ /*
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
+ */
+ mfspr r13, SPRN_HSRR0
+ addi r13, r13, 4
+ mtspr SPRN_HSRR0, r13
+ GET_SCRATCH0(r13)
+ hrfid
+ b .
+
+#elif defined(CONFIG_PPC_BOOK3S_32)
+
+#define FUNC(name) name
+#define MTMSR_EERI(reg) mtmsr (reg)
+
+.macro INTERRUPT_TRAMPOLINE intno
+
+.global kvmppc_trampoline_\intno
+kvmppc_trampoline_\intno:
+
+ mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */
+
+ /*
+ * First thing to do is to find out if we're coming
+ * from a KVM guest or a Linux process.
+ *
+ * To distinguish, we check a magic byte in the PACA/current
+ */
+ mfspr r13, SPRN_SPRG_THREAD
+ lwz r13, THREAD_KVM_SVCPU(r13)
+ /* PPC32 can have a NULL pointer - let's check for that */
+ mtspr SPRN_SPRG_SCRATCH1, r12 /* Save r12 */
+ mfcr r12
+ cmpwi r13, 0
+ bne 1f
+2: mtcr r12
+ mfspr r12, SPRN_SPRG_SCRATCH1
+ mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
+ b kvmppc_resume_\intno /* Get back original handler */
+
+1: tophys(r13, r13)
+ stw r12, HSTATE_SCRATCH1(r13)
+ mfspr r12, SPRN_SPRG_SCRATCH1
+ stw r12, HSTATE_SCRATCH0(r13)
+ lbz r12, HSTATE_IN_GUEST(r13)
+ cmpwi r12, KVM_GUEST_MODE_NONE
+ bne ..kvmppc_handler_hasmagic_\intno
+ /* No KVM guest? Then jump back to the Linux handler! */
+ lwz r12, HSTATE_SCRATCH1(r13)
+ b 2b
+
+ /* Now we know we're handling a KVM guest */
+..kvmppc_handler_hasmagic_\intno:
+
+ /* Should we just skip the faulting instruction? */
+ cmpwi r12, KVM_GUEST_MODE_SKIP
+ beq kvmppc_handler_skip_ins
+
+ /* Let's store which interrupt we're handling */
+ li r12, \intno
+
+ /* Jump into the SLB exit code that goes to the highmem handler */
+ b kvmppc_handler_trampoline_exit
+
+.endm
+
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DECREMENTER
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSCALL
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON
+INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
+
+/*
+ * Bring us back to the faulting code, but skip the
+ * faulting instruction.
+ *
+ * This is a generic exit path from the interrupt
+ * trampolines above.
+ *
+ * Input Registers:
+ *
+ * R12 = free
+ * R13 = Shadow VCPU (PACA)
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH1 = guest CR
+ * SPRG_SCRATCH0 = guest R13
+ *
+ */
+kvmppc_handler_skip_ins:
+
+ /* Patch the IP to the next instruction */
+ mfsrr0 r12
+ addi r12, r12, 4
+ mtsrr0 r12
+
+ /* Clean up all state */
+ lwz r12, HSTATE_SCRATCH1(r13)
+ mtcr r12
+ PPC_LL r12, HSTATE_SCRATCH0(r13)
+ GET_SCRATCH0(r13)
+
+ /* And get back into the code */
+ RFI
+#endif
+
+/*
+ * Call kvmppc_handler_trampoline_enter in real mode
+ *
+ * On entry, r4 contains the guest shadow MSR
+ */
+_GLOBAL(kvmppc_entry_trampoline)
+ mfmsr r5
+ LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
+ toreal(r7)
+
+ li r9, MSR_RI
+ ori r9, r9, MSR_EE
+ andc r9, r5, r9 /* Clear EE and RI in MSR value */
+ li r6, MSR_IR | MSR_DR
+ ori r6, r6, MSR_EE
+ andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */
+ MTMSR_EERI(r9) /* Clear EE and RI in MSR */
+ mtsrr0 r7 /* before we set srr0/1 */
+ mtsrr1 r6
+ RFI
+
+#if defined(CONFIG_PPC_BOOK3S_32)
+#define STACK_LR INT_FRAME_SIZE+4
+
+/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */
+#define MSR_EXT_START \
+ PPC_STL r20, _NIP(r1); \
+ mfmsr r20; \
+ LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
+ andc r3,r20,r3; /* Disable DR,EE */ \
+ mtmsr r3; \
+ sync
+
+#define MSR_EXT_END \
+ mtmsr r20; /* Enable DR,EE */ \
+ sync; \
+ PPC_LL r20, _NIP(r1)
+
+#elif defined(CONFIG_PPC_BOOK3S_64)
+#define STACK_LR _LINK
+#define MSR_EXT_START
+#define MSR_EXT_END
+#endif
+
+/*
+ * Activate current's external feature (FPU/Altivec/VSX)
+ */
+#define define_load_up(what) \
+ \
+_GLOBAL(kvmppc_load_up_ ## what); \
+ PPC_STLU r1, -INT_FRAME_SIZE(r1); \
+ mflr r3; \
+ PPC_STL r3, STACK_LR(r1); \
+ MSR_EXT_START; \
+ \
+ bl FUNC(load_up_ ## what); \
+ \
+ MSR_EXT_END; \
+ PPC_LL r3, STACK_LR(r1); \
+ mtlr r3; \
+ addi r1, r1, INT_FRAME_SIZE; \
+ blr
+
+define_load_up(fpu)
+#ifdef CONFIG_ALTIVEC
+define_load_up(altivec)
+#endif
+#ifdef CONFIG_VSX
+define_load_up(vsx)
+#endif
+
+#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
new file mode 100644
index 00000000..6e6e9cef
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -0,0 +1,367 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/* Real mode helpers */
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+
+#define GET_SHADOW_VCPU(reg) \
+ mr reg, r13
+#define MTMSR_EERI(reg) mtmsrd (reg),1
+
+#elif defined(CONFIG_PPC_BOOK3S_32)
+
+#define GET_SHADOW_VCPU(reg) \
+ tophys(reg, r2); \
+ lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \
+ tophys(reg, reg)
+#define MTMSR_EERI(reg) mtmsr (reg)
+
+#endif
+
+/* Disable for nested KVM */
+#define USE_QUICK_LAST_INST
+
+
+/* Get helper functions for subarch specific functionality */
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+#include "book3s_64_slb.S"
+#elif defined(CONFIG_PPC_BOOK3S_32)
+#include "book3s_32_sr.S"
+#endif
+
+/******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_enter
+kvmppc_handler_trampoline_enter:
+
+ /* Required state:
+ *
+ * MSR = ~IR|DR
+ * R1 = host R1
+ * R2 = host R2
+ * R4 = guest shadow MSR
+ * R5 = normal host MSR
+ * R6 = current host MSR (EE, IR, DR off)
+ * LR = highmem guest exit code
+ * all other volatile GPRS = free
+ * SVCPU[CR] = guest CR
+ * SVCPU[XER] = guest XER
+ * SVCPU[CTR] = guest CTR
+ * SVCPU[LR] = guest LR
+ */
+
+ /* r3 = shadow vcpu */
+ GET_SHADOW_VCPU(r3)
+
+ /* Save guest exit handler address and MSR */
+ mflr r0
+ PPC_STL r0, HSTATE_VMHANDLER(r3)
+ PPC_STL r5, HSTATE_HOST_MSR(r3)
+
+ /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
+ PPC_STL r1, HSTATE_HOST_R1(r3)
+ PPC_STL r2, HSTATE_HOST_R2(r3)
+
+ /* Activate guest mode, so faults get handled by KVM */
+ li r11, KVM_GUEST_MODE_GUEST
+ stb r11, HSTATE_IN_GUEST(r3)
+
+ /* Switch to guest segment. This is subarch specific. */
+ LOAD_GUEST_SEGMENTS
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Some guests may need to have dcbz set to 32 byte length.
+ *
+ * Usually we ensure that by patching the guest's instructions
+ * to trap on dcbz and emulate it in the hypervisor.
+ *
+ * If we can, we should tell the CPU to use 32 byte dcbz though,
+ * because that's a lot faster.
+ */
+ lbz r0, HSTATE_RESTORE_HID5(r3)
+ cmpwi r0, 0
+ beq no_dcbz32_on
+
+ mfspr r0,SPRN_HID5
+ ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */
+ mtspr SPRN_HID5,r0
+no_dcbz32_on:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ /* Enter guest */
+
+ PPC_LL r8, SVCPU_CTR(r3)
+ PPC_LL r9, SVCPU_LR(r3)
+ lwz r10, SVCPU_CR(r3)
+ lwz r11, SVCPU_XER(r3)
+
+ mtctr r8
+ mtlr r9
+ mtcr r10
+ mtxer r11
+
+ /* Move SRR0 and SRR1 into the respective regs */
+ PPC_LL r9, SVCPU_PC(r3)
+ /* First clear RI in our current MSR value */
+ li r0, MSR_RI
+ andc r6, r6, r0
+ MTMSR_EERI(r6)
+ mtsrr0 r9
+ mtsrr1 r4
+
+ PPC_LL r0, SVCPU_R0(r3)
+ PPC_LL r1, SVCPU_R1(r3)
+ PPC_LL r2, SVCPU_R2(r3)
+ PPC_LL r4, SVCPU_R4(r3)
+ PPC_LL r5, SVCPU_R5(r3)
+ PPC_LL r6, SVCPU_R6(r3)
+ PPC_LL r7, SVCPU_R7(r3)
+ PPC_LL r8, SVCPU_R8(r3)
+ PPC_LL r9, SVCPU_R9(r3)
+ PPC_LL r10, SVCPU_R10(r3)
+ PPC_LL r11, SVCPU_R11(r3)
+ PPC_LL r12, SVCPU_R12(r3)
+ PPC_LL r13, SVCPU_R13(r3)
+
+ PPC_LL r3, (SVCPU_R3)(r3)
+
+ RFI
+kvmppc_handler_trampoline_enter_end:
+
+
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
+
+.global kvmppc_interrupt
+kvmppc_interrupt:
+
+ /* Register usage at this point:
+ *
+ * SPRG_SCRATCH0 = guest R13
+ * R12 = exit handler id
+ * R13 = shadow vcpu (32-bit) or PACA (64-bit)
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH1 = guest CR
+ *
+ */
+
+ /* Save registers */
+
+ PPC_STL r0, SVCPU_R0(r13)
+ PPC_STL r1, SVCPU_R1(r13)
+ PPC_STL r2, SVCPU_R2(r13)
+ PPC_STL r3, SVCPU_R3(r13)
+ PPC_STL r4, SVCPU_R4(r13)
+ PPC_STL r5, SVCPU_R5(r13)
+ PPC_STL r6, SVCPU_R6(r13)
+ PPC_STL r7, SVCPU_R7(r13)
+ PPC_STL r8, SVCPU_R8(r13)
+ PPC_STL r9, SVCPU_R9(r13)
+ PPC_STL r10, SVCPU_R10(r13)
+ PPC_STL r11, SVCPU_R11(r13)
+
+ /* Restore R1/R2 so we can handle faults */
+ PPC_LL r1, HSTATE_HOST_R1(r13)
+ PPC_LL r2, HSTATE_HOST_R2(r13)
+
+ /* Save guest PC and MSR */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ andi. r0, r12, 0x2
+ cmpwi cr1, r0, 0
+ beq 1f
+ mfspr r3,SPRN_HSRR0
+ mfspr r4,SPRN_HSRR1
+ andi. r12,r12,0x3ffd
+ b 2f
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+1: mfsrr0 r3
+ mfsrr1 r4
+2:
+ PPC_STL r3, SVCPU_PC(r13)
+ PPC_STL r4, SVCPU_SHADOW_SRR1(r13)
+
+ /* Get scratch'ed off registers */
+ GET_SCRATCH0(r9)
+ PPC_LL r8, HSTATE_SCRATCH0(r13)
+ lwz r7, HSTATE_SCRATCH1(r13)
+
+ PPC_STL r9, SVCPU_R13(r13)
+ PPC_STL r8, SVCPU_R12(r13)
+ stw r7, SVCPU_CR(r13)
+
+ /* Save more register state */
+
+ mfxer r5
+ mfdar r6
+ mfdsisr r7
+ mfctr r8
+ mflr r9
+
+ stw r5, SVCPU_XER(r13)
+ PPC_STL r6, SVCPU_FAULT_DAR(r13)
+ stw r7, SVCPU_FAULT_DSISR(r13)
+ PPC_STL r8, SVCPU_CTR(r13)
+ PPC_STL r9, SVCPU_LR(r13)
+
+ /*
+ * In order for us to easily get the last instruction,
+ * we got the #vmexit at, we exploit the fact that the
+ * virtual layout is still the same here, so we can just
+ * ld from the guest's PC address
+ */
+
+ /* We only load the last instruction when it's safe */
+ cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE
+ beq ld_last_inst
+ cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
+ beq ld_last_inst
+ cmpwi r12, BOOK3S_INTERRUPT_SYSCALL
+ beq ld_last_prev_inst
+ cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
+ beq- ld_last_inst
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ beq- ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+
+ b no_ld_last_inst
+
+ld_last_prev_inst:
+ addi r3, r3, -4
+
+ld_last_inst:
+ /* Save off the guest instruction we're at */
+
+ /* In case lwz faults */
+ li r0, KVM_INST_FETCH_FAILED
+
+#ifdef USE_QUICK_LAST_INST
+
+ /* Set guest mode to 'jump over instruction' so if lwz faults
+ * we'll just continue at the next IP. */
+ li r9, KVM_GUEST_MODE_SKIP
+ stb r9, HSTATE_IN_GUEST(r13)
+
+ /* 1) enable paging for data */
+ mfmsr r9
+ ori r11, r9, MSR_DR /* Enable paging for data */
+ mtmsr r11
+ sync
+ /* 2) fetch the instruction */
+ lwz r0, 0(r3)
+ /* 3) disable paging again */
+ mtmsr r9
+ sync
+
+#endif
+ stw r0, SVCPU_LAST_INST(r13)
+
+no_ld_last_inst:
+
+ /* Unset guest mode */
+ li r9, KVM_GUEST_MODE_NONE
+ stb r9, HSTATE_IN_GUEST(r13)
+
+ /* Switch back to host MMU */
+ LOAD_HOST_SEGMENTS
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+ lbz r5, HSTATE_RESTORE_HID5(r13)
+ cmpwi r5, 0
+ beq no_dcbz32_off
+
+ li r4, 0
+ mfspr r5,SPRN_HID5
+ rldimi r5,r4,6,56
+ mtspr SPRN_HID5,r5
+
+no_dcbz32_off:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ /*
+ * For some interrupts, we need to call the real Linux
+ * handler, so it can do work for us. This has to happen
+ * as if the interrupt arrived from the kernel though,
+ * so let's fake it here where most state is restored.
+ *
+ * Having set up SRR0/1 with the address where we want
+ * to continue with relocation on (potentially in module
+ * space), we either just go straight there with rfi[d],
+ * or we jump to an interrupt handler if there is an
+ * interrupt to be handled first. In the latter case,
+ * the rfi[d] at the end of the interrupt handler will
+ * get us back to where we want to continue.
+ */
+
+ /* Register usage at this point:
+ *
+ * R1 = host R1
+ * R2 = host R2
+ * R10 = raw exit handler id
+ * R12 = exit handler id
+ * R13 = shadow vcpu (32-bit) or PACA (64-bit)
+ * SVCPU.* = guest *
+ *
+ */
+
+ PPC_LL r6, HSTATE_HOST_MSR(r13)
+ PPC_LL r8, HSTATE_VMHANDLER(r13)
+
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ beq cr1, 1f
+ mtspr SPRN_HSRR1, r6
+ mtspr SPRN_HSRR0, r8
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+1: /* Restore host msr -> SRR1 */
+ mtsrr1 r6
+ /* Load highmem handler address */
+ mtsrr0 r8
+
+ /* RFI into the highmem handler, or jump to interrupt handler */
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ beqa BOOK3S_INTERRUPT_EXTERNAL
+ cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
+ beqa BOOK3S_INTERRUPT_DECREMENTER
+ cmpwi r12, BOOK3S_INTERRUPT_PERFMON
+ beqa BOOK3S_INTERRUPT_PERFMON
+
+ RFI
+kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
new file mode 100644
index 00000000..ee9e1ee9
--- /dev/null
+++ b/arch/powerpc/kvm/booke.c
@@ -0,0 +1,1023 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include "timing.h"
+#include <asm/cacheflush.h>
+
+#include "booke.h"
+
+unsigned long kvmppc_booke_handlers;
+
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { "mmio", VCPU_STAT(mmio_exits) },
+ { "dcr", VCPU_STAT(dcr_exits) },
+ { "sig", VCPU_STAT(signal_exits) },
+ { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
+ { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
+ { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
+ { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) },
+ { "sysc", VCPU_STAT(syscall_exits) },
+ { "isi", VCPU_STAT(isi_exits) },
+ { "dsi", VCPU_STAT(dsi_exits) },
+ { "inst_emu", VCPU_STAT(emulated_inst_exits) },
+ { "dec", VCPU_STAT(dec_exits) },
+ { "ext_intr", VCPU_STAT(ext_intr_exits) },
+ { "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { NULL }
+};
+
+/* TODO: use vcpu_printf() */
+void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
+ printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+ printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
+ vcpu->arch.shared->srr1);
+
+ printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
+
+ for (i = 0; i < 32; i += 4) {
+ printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
+ kvmppc_get_gpr(vcpu, i),
+ kvmppc_get_gpr(vcpu, i+1),
+ kvmppc_get_gpr(vcpu, i+2),
+ kvmppc_get_gpr(vcpu, i+3));
+ }
+}
+
+#ifdef CONFIG_SPE
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ enable_kernel_spe();
+ kvmppc_save_guest_spe(vcpu);
+ vcpu->arch.shadow_msr &= ~MSR_SPE;
+ preempt_enable();
+}
+
+static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ enable_kernel_spe();
+ kvmppc_load_guest_spe(vcpu);
+ vcpu->arch.shadow_msr |= MSR_SPE;
+ preempt_enable();
+}
+
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.shared->msr & MSR_SPE) {
+ if (!(vcpu->arch.shadow_msr & MSR_SPE))
+ kvmppc_vcpu_enable_spe(vcpu);
+ } else if (vcpu->arch.shadow_msr & MSR_SPE) {
+ kvmppc_vcpu_disable_spe(vcpu);
+ }
+}
+#else
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+}
+#endif
+
+/*
+ * Helper function for "full" MSR writes. No need to call this if only
+ * EE/CE/ME/DE/RI are changing.
+ */
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+ u32 old_msr = vcpu->arch.shared->msr;
+
+ vcpu->arch.shared->msr = new_msr;
+
+ kvmppc_mmu_msr_notify(vcpu, old_msr);
+ kvmppc_vcpu_sync_spe(vcpu);
+}
+
+static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
+ unsigned int priority)
+{
+ set_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+ ulong dear_flags, ulong esr_flags)
+{
+ vcpu->arch.queued_dear = dear_flags;
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+}
+
+static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+ ulong dear_flags, ulong esr_flags)
+{
+ vcpu->arch.queued_dear = dear_flags;
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+}
+
+static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
+ ulong esr_flags)
+{
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+}
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
+{
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+ return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq)
+{
+ unsigned int prio = BOOKE_IRQPRIO_EXTERNAL;
+
+ if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+ prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL;
+
+ kvmppc_booke_queue_irqprio(vcpu, prio);
+}
+
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq)
+{
+ clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
+ clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+}
+
+/* Deliver the interrupt of the corresponding priority, if possible. */
+static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+ unsigned int priority)
+{
+ int allowed = 0;
+ ulong uninitialized_var(msr_mask);
+ bool update_esr = false, update_dear = false;
+ ulong crit_raw = vcpu->arch.shared->critical;
+ ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+ bool crit;
+ bool keep_irq = false;
+
+ /* Truncate crit indicators in 32 bit mode */
+ if (!(vcpu->arch.shared->msr & MSR_SF)) {
+ crit_raw &= 0xffffffff;
+ crit_r1 &= 0xffffffff;
+ }
+
+ /* Critical section when crit == r1 */
+ crit = (crit_raw == crit_r1);
+ /* ... and we're in supervisor mode */
+ crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+
+ if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) {
+ priority = BOOKE_IRQPRIO_EXTERNAL;
+ keep_irq = true;
+ }
+
+ switch (priority) {
+ case BOOKE_IRQPRIO_DTLB_MISS:
+ case BOOKE_IRQPRIO_DATA_STORAGE:
+ update_dear = true;
+ /* fall through */
+ case BOOKE_IRQPRIO_INST_STORAGE:
+ case BOOKE_IRQPRIO_PROGRAM:
+ update_esr = true;
+ /* fall through */
+ case BOOKE_IRQPRIO_ITLB_MISS:
+ case BOOKE_IRQPRIO_SYSCALL:
+ case BOOKE_IRQPRIO_FP_UNAVAIL:
+ case BOOKE_IRQPRIO_SPE_UNAVAIL:
+ case BOOKE_IRQPRIO_SPE_FP_DATA:
+ case BOOKE_IRQPRIO_SPE_FP_ROUND:
+ case BOOKE_IRQPRIO_AP_UNAVAIL:
+ case BOOKE_IRQPRIO_ALIGNMENT:
+ allowed = 1;
+ msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ break;
+ case BOOKE_IRQPRIO_CRITICAL:
+ case BOOKE_IRQPRIO_WATCHDOG:
+ allowed = vcpu->arch.shared->msr & MSR_CE;
+ msr_mask = MSR_ME;
+ break;
+ case BOOKE_IRQPRIO_MACHINE_CHECK:
+ allowed = vcpu->arch.shared->msr & MSR_ME;
+ msr_mask = 0;
+ break;
+ case BOOKE_IRQPRIO_DECREMENTER:
+ case BOOKE_IRQPRIO_FIT:
+ keep_irq = true;
+ /* fall through */
+ case BOOKE_IRQPRIO_EXTERNAL:
+ allowed = vcpu->arch.shared->msr & MSR_EE;
+ allowed = allowed && !crit;
+ msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ break;
+ case BOOKE_IRQPRIO_DEBUG:
+ allowed = vcpu->arch.shared->msr & MSR_DE;
+ msr_mask = MSR_ME;
+ break;
+ }
+
+ if (allowed) {
+ vcpu->arch.shared->srr0 = vcpu->arch.pc;
+ vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
+ vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+ if (update_esr == true)
+ vcpu->arch.shared->esr = vcpu->arch.queued_esr;
+ if (update_dear == true)
+ vcpu->arch.shared->dar = vcpu->arch.queued_dear;
+ kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
+
+ if (!keep_irq)
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
+ }
+
+ return allowed;
+}
+
+static void update_timer_ints(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
+ kvmppc_core_queue_dec(vcpu);
+ else
+ kvmppc_core_dequeue_dec(vcpu);
+}
+
+static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
+{
+ unsigned long *pending = &vcpu->arch.pending_exceptions;
+ unsigned int priority;
+
+ if (vcpu->requests) {
+ if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
+ smp_mb();
+ update_timer_ints(vcpu);
+ }
+ }
+
+ priority = __ffs(*pending);
+ while (priority <= BOOKE_IRQPRIO_MAX) {
+ if (kvmppc_booke_irqprio_deliver(vcpu, priority))
+ break;
+
+ priority = find_next_bit(pending,
+ BITS_PER_BYTE * sizeof(*pending),
+ priority + 1);
+ }
+
+ /* Tell the guest about our interrupt status */
+ vcpu->arch.shared->int_pending = !!*pending;
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!irqs_disabled());
+
+ kvmppc_core_check_exceptions(vcpu);
+
+ if (vcpu->arch.shared->msr & MSR_WE) {
+ local_irq_enable();
+ kvm_vcpu_block(vcpu);
+ local_irq_disable();
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+ kvmppc_core_check_exceptions(vcpu);
+ };
+}
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ if (!vcpu->arch.sane) {
+ kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
+
+ local_irq_disable();
+
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ if (signal_pending(current)) {
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ ret = -EINTR;
+ goto out;
+ }
+
+ kvm_guest_enter();
+ ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ kvm_guest_exit();
+
+out:
+ local_irq_enable();
+ return ret;
+}
+
+/**
+ * kvmppc_handle_exit
+ *
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
+ */
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ enum emulation_result er;
+ int r = RESUME_HOST;
+
+ /* update before a new last_exit_type is rewritten */
+ kvmppc_update_timing_stats(vcpu);
+
+ local_irq_enable();
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+
+ switch (exit_nr) {
+ case BOOKE_INTERRUPT_MACHINE_CHECK:
+ printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
+ kvmppc_dump_vcpu(vcpu);
+ r = RESUME_HOST;
+ break;
+
+ case BOOKE_INTERRUPT_EXTERNAL:
+ kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
+ if (need_resched())
+ cond_resched();
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_DECREMENTER:
+ /* Since we switched IVPR back to the host's value, the host
+ * handled this interrupt the moment we enabled interrupts.
+ * Now we just offer it a chance to reschedule the guest. */
+ kvmppc_account_exit(vcpu, DEC_EXITS);
+ if (need_resched())
+ cond_resched();
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_PROGRAM:
+ if (vcpu->arch.shared->msr & MSR_PR) {
+ /* Program traps generated by user-level software must be handled
+ * by the guest kernel. */
+ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
+ r = RESUME_GUEST;
+ kvmppc_account_exit(vcpu, USR_PR_INST);
+ break;
+ }
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* don't overwrite subtypes, just account kvm_stats */
+ kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
+ /* Future optimization: only reload non-volatiles if
+ * they were actually modified by emulation. */
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_DO_DCR:
+ run->exit_reason = KVM_EXIT_DCR;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ /* XXX Deliver Program interrupt to guest. */
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+ /* For debugging, encode the failing instruction and
+ * report it to userspace. */
+ run->hw.hardware_exit_reason = ~0ULL << 32;
+ run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+ r = RESUME_HOST;
+ break;
+ default:
+ BUG();
+ }
+ break;
+
+ case BOOKE_INTERRUPT_FP_UNAVAIL:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+ kvmppc_account_exit(vcpu, FP_UNAVAIL);
+ r = RESUME_GUEST;
+ break;
+
+#ifdef CONFIG_SPE
+ case BOOKE_INTERRUPT_SPE_UNAVAIL: {
+ if (vcpu->arch.shared->msr & MSR_SPE)
+ kvmppc_vcpu_enable_spe(vcpu);
+ else
+ kvmppc_booke_queue_irqprio(vcpu,
+ BOOKE_IRQPRIO_SPE_UNAVAIL);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ case BOOKE_INTERRUPT_SPE_FP_DATA:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_SPE_FP_ROUND:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
+ r = RESUME_GUEST;
+ break;
+#else
+ case BOOKE_INTERRUPT_SPE_UNAVAIL:
+ /*
+ * Guest wants SPE, but host kernel doesn't support it. Send
+ * an "unimplemented operation" program check to the guest.
+ */
+ kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
+ r = RESUME_GUEST;
+ break;
+
+ /*
+ * These really should never happen without CONFIG_SPE,
+ * as we should never enable the real MSR[SPE] in the guest.
+ */
+ case BOOKE_INTERRUPT_SPE_FP_DATA:
+ case BOOKE_INTERRUPT_SPE_FP_ROUND:
+ printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
+ __func__, exit_nr, vcpu->arch.pc);
+ run->hw.hardware_exit_reason = exit_nr;
+ r = RESUME_HOST;
+ break;
+#endif
+
+ case BOOKE_INTERRUPT_DATA_STORAGE:
+ kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
+ vcpu->arch.fault_esr);
+ kvmppc_account_exit(vcpu, DSI_EXITS);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_INST_STORAGE:
+ kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
+ kvmppc_account_exit(vcpu, ISI_EXITS);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_SYSCALL:
+ if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+ /* KVM PV hypercalls */
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ r = RESUME_GUEST;
+ } else {
+ /* Guest syscalls */
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+ }
+ kvmppc_account_exit(vcpu, SYSCALL_EXITS);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_DTLB_MISS: {
+ unsigned long eaddr = vcpu->arch.fault_dear;
+ int gtlb_index;
+ gpa_t gpaddr;
+ gfn_t gfn;
+
+#ifdef CONFIG_KVM_E500
+ if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
+ kvmppc_map_magic(vcpu);
+ kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+ r = RESUME_GUEST;
+
+ break;
+ }
+#endif
+
+ /* Check the guest TLB. */
+ gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
+ if (gtlb_index < 0) {
+ /* The guest didn't have a mapping for it. */
+ kvmppc_core_queue_dtlb_miss(vcpu,
+ vcpu->arch.fault_dear,
+ vcpu->arch.fault_esr);
+ kvmppc_mmu_dtlb_miss(vcpu);
+ kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+ gfn = gpaddr >> PAGE_SHIFT;
+
+ if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ /* The guest TLB had a mapping, but the shadow TLB
+ * didn't, and it is RAM. This could be because:
+ * a) the entry is mapping the host kernel, or
+ * b) the guest used a large mapping which we're faking
+ * Either way, we need to satisfy the fault without
+ * invoking the guest. */
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
+ kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+ r = RESUME_GUEST;
+ } else {
+ /* Guest has mapped and accessed a page which is not
+ * actually RAM. */
+ vcpu->arch.paddr_accessed = gpaddr;
+ r = kvmppc_emulate_mmio(run, vcpu);
+ kvmppc_account_exit(vcpu, MMIO_EXITS);
+ }
+
+ break;
+ }
+
+ case BOOKE_INTERRUPT_ITLB_MISS: {
+ unsigned long eaddr = vcpu->arch.pc;
+ gpa_t gpaddr;
+ gfn_t gfn;
+ int gtlb_index;
+
+ r = RESUME_GUEST;
+
+ /* Check the guest TLB. */
+ gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
+ if (gtlb_index < 0) {
+ /* The guest didn't have a mapping for it. */
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+ kvmppc_mmu_itlb_miss(vcpu);
+ kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
+ break;
+ }
+
+ kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+
+ gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+ gfn = gpaddr >> PAGE_SHIFT;
+
+ if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ /* The guest TLB had a mapping, but the shadow TLB
+ * didn't. This could be because:
+ * a) the entry is mapping the host kernel, or
+ * b) the guest used a large mapping which we're faking
+ * Either way, we need to satisfy the fault without
+ * invoking the guest. */
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
+ } else {
+ /* Guest mapped and leaped at non-RAM! */
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
+ }
+
+ break;
+ }
+
+ case BOOKE_INTERRUPT_DEBUG: {
+ u32 dbsr;
+
+ vcpu->arch.pc = mfspr(SPRN_CSRR0);
+
+ /* clear IAC events in DBSR register */
+ dbsr = mfspr(SPRN_DBSR);
+ dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
+ mtspr(SPRN_DBSR, dbsr);
+
+ run->exit_reason = KVM_EXIT_DEBUG;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
+ r = RESUME_HOST;
+ break;
+ }
+
+ default:
+ printk(KERN_EMERG "exit_nr %d\n", exit_nr);
+ BUG();
+ }
+
+ local_irq_disable();
+
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ if (!(r & RESUME_HOST)) {
+ /* To avoid clobbering exit_reason, only check for signals if
+ * we aren't already exiting to userspace for some other
+ * reason. */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+ kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+ }
+ }
+
+ return r;
+}
+
+/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int i;
+ int r;
+
+ vcpu->arch.pc = 0;
+ vcpu->arch.shared->msr = 0;
+ vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
+ kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+
+ vcpu->arch.shadow_pid = 1;
+
+ /* Eye-catching numbers so we know if the guest takes an interrupt
+ * before it's programmed its own IVPR/IVORs. */
+ vcpu->arch.ivpr = 0x55550000;
+ for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+ vcpu->arch.ivor[i] = 0x7700 | i * 4;
+
+ kvmppc_init_timing_stats(vcpu);
+
+ r = kvmppc_core_vcpu_setup(vcpu);
+ kvmppc_sanity_check(vcpu);
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ regs->pc = vcpu->arch.pc;
+ regs->cr = kvmppc_get_cr(vcpu);
+ regs->ctr = vcpu->arch.ctr;
+ regs->lr = vcpu->arch.lr;
+ regs->xer = kvmppc_get_xer(vcpu);
+ regs->msr = vcpu->arch.shared->msr;
+ regs->srr0 = vcpu->arch.shared->srr0;
+ regs->srr1 = vcpu->arch.shared->srr1;
+ regs->pid = vcpu->arch.pid;
+ regs->sprg0 = vcpu->arch.shared->sprg0;
+ regs->sprg1 = vcpu->arch.shared->sprg1;
+ regs->sprg2 = vcpu->arch.shared->sprg2;
+ regs->sprg3 = vcpu->arch.shared->sprg3;
+ regs->sprg4 = vcpu->arch.shared->sprg4;
+ regs->sprg5 = vcpu->arch.shared->sprg5;
+ regs->sprg6 = vcpu->arch.shared->sprg6;
+ regs->sprg7 = vcpu->arch.shared->sprg7;
+
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ vcpu->arch.pc = regs->pc;
+ kvmppc_set_cr(vcpu, regs->cr);
+ vcpu->arch.ctr = regs->ctr;
+ vcpu->arch.lr = regs->lr;
+ kvmppc_set_xer(vcpu, regs->xer);
+ kvmppc_set_msr(vcpu, regs->msr);
+ vcpu->arch.shared->srr0 = regs->srr0;
+ vcpu->arch.shared->srr1 = regs->srr1;
+ kvmppc_set_pid(vcpu, regs->pid);
+ vcpu->arch.shared->sprg0 = regs->sprg0;
+ vcpu->arch.shared->sprg1 = regs->sprg1;
+ vcpu->arch.shared->sprg2 = regs->sprg2;
+ vcpu->arch.shared->sprg3 = regs->sprg3;
+ vcpu->arch.shared->sprg4 = regs->sprg4;
+ vcpu->arch.shared->sprg5 = regs->sprg5;
+ vcpu->arch.shared->sprg6 = regs->sprg6;
+ vcpu->arch.shared->sprg7 = regs->sprg7;
+
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+
+ return 0;
+}
+
+static void get_sregs_base(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ u64 tb = get_tb();
+
+ sregs->u.e.features |= KVM_SREGS_E_BASE;
+
+ sregs->u.e.csrr0 = vcpu->arch.csrr0;
+ sregs->u.e.csrr1 = vcpu->arch.csrr1;
+ sregs->u.e.mcsr = vcpu->arch.mcsr;
+ sregs->u.e.esr = vcpu->arch.shared->esr;
+ sregs->u.e.dear = vcpu->arch.shared->dar;
+ sregs->u.e.tsr = vcpu->arch.tsr;
+ sregs->u.e.tcr = vcpu->arch.tcr;
+ sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
+ sregs->u.e.tb = tb;
+ sregs->u.e.vrsave = vcpu->arch.vrsave;
+}
+
+static int set_sregs_base(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
+ return 0;
+
+ vcpu->arch.csrr0 = sregs->u.e.csrr0;
+ vcpu->arch.csrr1 = sregs->u.e.csrr1;
+ vcpu->arch.mcsr = sregs->u.e.mcsr;
+ vcpu->arch.shared->esr = sregs->u.e.esr;
+ vcpu->arch.shared->dar = sregs->u.e.dear;
+ vcpu->arch.vrsave = sregs->u.e.vrsave;
+ kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
+
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
+ vcpu->arch.dec = sregs->u.e.dec;
+ kvmppc_emulate_dec(vcpu);
+ }
+
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+ vcpu->arch.tsr = sregs->u.e.tsr;
+ update_timer_ints(vcpu);
+ }
+
+ return 0;
+}
+
+static void get_sregs_arch206(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206;
+
+ sregs->u.e.pir = vcpu->vcpu_id;
+ sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
+ sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
+ sregs->u.e.decar = vcpu->arch.decar;
+ sregs->u.e.ivpr = vcpu->arch.ivpr;
+}
+
+static int set_sregs_arch206(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
+ return 0;
+
+ if (sregs->u.e.pir != vcpu->vcpu_id)
+ return -EINVAL;
+
+ vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
+ vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
+ vcpu->arch.decar = sregs->u.e.decar;
+ vcpu->arch.ivpr = sregs->u.e.ivpr;
+
+ return 0;
+}
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ sregs->u.e.features |= KVM_SREGS_E_IVOR;
+
+ sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+}
+
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ sregs->pvr = vcpu->arch.pvr;
+
+ get_sregs_base(vcpu, sregs);
+ get_sregs_arch206(vcpu, sregs);
+ kvmppc_core_get_sregs(vcpu, sregs);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int ret;
+
+ if (vcpu->arch.pvr != sregs->pvr)
+ return -EINVAL;
+
+ ret = set_sregs_base(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ ret = set_sregs_arch206(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ return kvmppc_core_set_sregs(vcpu, sregs);
+}
+
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ return -EINVAL;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int r;
+
+ r = kvmppc_core_vcpu_translate(vcpu, tr);
+ return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ return -ENOTSUPP;
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+ return 0;
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
+{
+ vcpu->arch.tcr = new_tcr;
+ update_timer_ints(vcpu);
+}
+
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+ set_bits(tsr_bits, &vcpu->arch.tsr);
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+ clear_bits(tsr_bits, &vcpu->arch.tsr);
+ update_timer_ints(vcpu);
+}
+
+void kvmppc_decrementer_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+ kvmppc_set_tsr_bits(vcpu, TSR_DIS);
+}
+
+int __init kvmppc_booke_init(void)
+{
+ unsigned long ivor[16];
+ unsigned long max_ivor = 0;
+ int i;
+
+ /* We install our own exception handlers by hijacking IVPR. IVPR must
+ * be 16-bit aligned, so we need a 64KB allocation. */
+ kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ VCPU_SIZE_ORDER);
+ if (!kvmppc_booke_handlers)
+ return -ENOMEM;
+
+ /* XXX make sure our handlers are smaller than Linux's */
+
+ /* Copy our interrupt handlers to match host IVORs. That way we don't
+ * have to swap the IVORs on every guest/host transition. */
+ ivor[0] = mfspr(SPRN_IVOR0);
+ ivor[1] = mfspr(SPRN_IVOR1);
+ ivor[2] = mfspr(SPRN_IVOR2);
+ ivor[3] = mfspr(SPRN_IVOR3);
+ ivor[4] = mfspr(SPRN_IVOR4);
+ ivor[5] = mfspr(SPRN_IVOR5);
+ ivor[6] = mfspr(SPRN_IVOR6);
+ ivor[7] = mfspr(SPRN_IVOR7);
+ ivor[8] = mfspr(SPRN_IVOR8);
+ ivor[9] = mfspr(SPRN_IVOR9);
+ ivor[10] = mfspr(SPRN_IVOR10);
+ ivor[11] = mfspr(SPRN_IVOR11);
+ ivor[12] = mfspr(SPRN_IVOR12);
+ ivor[13] = mfspr(SPRN_IVOR13);
+ ivor[14] = mfspr(SPRN_IVOR14);
+ ivor[15] = mfspr(SPRN_IVOR15);
+
+ for (i = 0; i < 16; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + i * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+ return 0;
+}
+
+void __exit kvmppc_booke_exit(void)
+{
+ free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+ kvm_exit();
+}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 00000000..2fe20270
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,74 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_BOOKE_H__
+#define __KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_ppc.h>
+#include "timing.h"
+
+/* interrupt priortity ordering */
+#define BOOKE_IRQPRIO_DATA_STORAGE 0
+#define BOOKE_IRQPRIO_INST_STORAGE 1
+#define BOOKE_IRQPRIO_ALIGNMENT 2
+#define BOOKE_IRQPRIO_PROGRAM 3
+#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
+#define BOOKE_IRQPRIO_SPE_FP_DATA 6
+#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#define BOOKE_IRQPRIO_SYSCALL 8
+#define BOOKE_IRQPRIO_AP_UNAVAIL 9
+#define BOOKE_IRQPRIO_DTLB_MISS 10
+#define BOOKE_IRQPRIO_ITLB_MISS 11
+#define BOOKE_IRQPRIO_MACHINE_CHECK 12
+#define BOOKE_IRQPRIO_DEBUG 13
+#define BOOKE_IRQPRIO_CRITICAL 14
+#define BOOKE_IRQPRIO_WATCHDOG 15
+#define BOOKE_IRQPRIO_EXTERNAL 16
+#define BOOKE_IRQPRIO_FIT 17
+#define BOOKE_IRQPRIO_DECREMENTER 18
+#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
+/* Internal pseudo-irqprio for level triggered externals */
+#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
+#define BOOKE_IRQPRIO_MAX 20
+
+extern unsigned long kvmppc_booke_handlers;
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
+
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
+
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+
+/* low-level asm code to transfer guest state */
+void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
+void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
+
+/* high-level function, manages flags, host state */
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
+
+#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
new file mode 100644
index 00000000..3e652da3
--- /dev/null
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -0,0 +1,272 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/disassemble.h>
+
+#include "booke.h"
+
+#define OP_19_XOP_RFI 50
+
+#define OP_31_XOP_MFMSR 83
+#define OP_31_XOP_WRTEE 131
+#define OP_31_XOP_MTMSR 146
+#define OP_31_XOP_WRTEEI 163
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.shared->srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
+}
+
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int rs;
+ int rt;
+
+ switch (get_op(inst)) {
+ case 19:
+ switch (get_xop(inst)) {
+ case OP_19_XOP_RFI:
+ kvmppc_emul_rfi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+ *advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 31:
+ switch (get_xop(inst)) {
+
+ case OP_31_XOP_MFMSR:
+ rt = get_rt(inst);
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
+ kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+ break;
+
+ case OP_31_XOP_MTMSR:
+ rs = get_rs(inst);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+ kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
+ break;
+
+ case OP_31_XOP_WRTEE:
+ rs = get_rs(inst);
+ vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
+ | (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ case OP_31_XOP_WRTEEI:
+ vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
+ | (inst & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ int emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
+
+ switch (sprn) {
+ case SPRN_DEAR:
+ vcpu->arch.shared->dar = spr_val; break;
+ case SPRN_ESR:
+ vcpu->arch.shared->esr = spr_val; break;
+ case SPRN_DBCR0:
+ vcpu->arch.dbcr0 = spr_val; break;
+ case SPRN_DBCR1:
+ vcpu->arch.dbcr1 = spr_val; break;
+ case SPRN_DBSR:
+ vcpu->arch.dbsr &= ~spr_val; break;
+ case SPRN_TSR:
+ kvmppc_clr_tsr_bits(vcpu, spr_val);
+ break;
+ case SPRN_TCR:
+ kvmppc_set_tcr(vcpu, spr_val);
+ break;
+
+ /* Note: SPRG4-7 are user-readable. These values are
+ * loaded into the real SPRGs when resuming the
+ * guest. */
+ case SPRN_SPRG4:
+ vcpu->arch.shared->sprg4 = spr_val; break;
+ case SPRN_SPRG5:
+ vcpu->arch.shared->sprg5 = spr_val; break;
+ case SPRN_SPRG6:
+ vcpu->arch.shared->sprg6 = spr_val; break;
+ case SPRN_SPRG7:
+ vcpu->arch.shared->sprg7 = spr_val; break;
+
+ case SPRN_IVPR:
+ vcpu->arch.ivpr = spr_val;
+ break;
+ case SPRN_IVOR0:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_IVPR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
+ case SPRN_DEAR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
+ case SPRN_ESR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break;
+ case SPRN_DBCR0:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
+ case SPRN_DBCR1:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
+ case SPRN_DBSR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
+ case SPRN_TSR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break;
+ case SPRN_TCR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break;
+
+ case SPRN_IVOR0:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+ break;
+ case SPRN_IVOR1:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
+ break;
+ case SPRN_IVOR2:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+ break;
+ case SPRN_IVOR3:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
+ break;
+ case SPRN_IVOR4:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
+ break;
+ case SPRN_IVOR5:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
+ break;
+ case SPRN_IVOR6:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
+ break;
+ case SPRN_IVOR7:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
+ break;
+ case SPRN_IVOR8:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+ break;
+ case SPRN_IVOR9:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
+ break;
+ case SPRN_IVOR10:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
+ break;
+ case SPRN_IVOR11:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
+ break;
+ case SPRN_IVOR12:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
+ break;
+ case SPRN_IVOR13:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
+ break;
+ case SPRN_IVOR14:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
+ break;
+ case SPRN_IVOR15:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
new file mode 100644
index 00000000..c8c4b878
--- /dev/null
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -0,0 +1,487 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu-44x.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
+
+/* The host stack layout: */
+#define HOST_R1 0 /* Implied by stwu. */
+#define HOST_CALLEE_LR 4
+#define HOST_RUN 8
+/* r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option. */
+#define HOST_R2 12
+#define HOST_CR 16
+#define HOST_NV_GPRS 20
+#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4))
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4)
+#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
+#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */
+
+#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
+ (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
+ (1<<BOOKE_INTERRUPT_DEBUG))
+
+#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+ (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+ (1<<BOOKE_INTERRUPT_INST_STORAGE) | \
+ (1<<BOOKE_INTERRUPT_PROGRAM) | \
+ (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+.macro KVM_HANDLER ivor_nr
+_GLOBAL(kvmppc_handler_\ivor_nr)
+ /* Get pointer to vcpu and record exit number. */
+ mtspr SPRN_SPRG_WSCRATCH0, r4
+ mfspr r4, SPRN_SPRG_RVCPU
+ stw r5, VCPU_GPR(r5)(r4)
+ stw r6, VCPU_GPR(r6)(r4)
+ mfctr r5
+ lis r6, kvmppc_resume_host@h
+ stw r5, VCPU_CTR(r4)
+ li r5, \ivor_nr
+ ori r6, r6, kvmppc_resume_host@l
+ mtctr r6
+ bctr
+.endm
+
+_GLOBAL(kvmppc_handlers_start)
+KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
+KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
+KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
+KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
+KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
+KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
+KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
+KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
+KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
+KVM_HANDLER BOOKE_INTERRUPT_FIT
+KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
+KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
+KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
+KVM_HANDLER BOOKE_INTERRUPT_DEBUG
+KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
+
+_GLOBAL(kvmppc_handler_len)
+ .long kvmppc_handler_1 - kvmppc_handler_0
+
+
+/* Registers:
+ * SPRG_SCRATCH0: guest r4
+ * r4: vcpu pointer
+ * r5: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+ stw r3, VCPU_GPR(r3)(r4)
+ mfcr r3
+ stw r3, VCPU_CR(r4)
+ stw r7, VCPU_GPR(r7)(r4)
+ stw r8, VCPU_GPR(r8)(r4)
+ stw r9, VCPU_GPR(r9)(r4)
+
+ li r6, 1
+ slw r6, r6, r5
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save exit time */
+1:
+ mfspr r7, SPRN_TBRU
+ mfspr r8, SPRN_TBRL
+ mfspr r9, SPRN_TBRU
+ cmpw r9, r7
+ bne 1b
+ stw r8, VCPU_TIMING_EXIT_TBL(r4)
+ stw r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
+ /* Save the faulting instruction and all GPRs for emulation. */
+ andi. r7, r6, NEED_INST_MASK
+ beq ..skip_inst_copy
+ mfspr r9, SPRN_SRR0
+ mfmsr r8
+ ori r7, r8, MSR_DS
+ mtmsr r7
+ isync
+ lwz r9, 0(r9)
+ mtmsr r8
+ isync
+ stw r9, VCPU_LAST_INST(r4)
+
+ stw r15, VCPU_GPR(r15)(r4)
+ stw r16, VCPU_GPR(r16)(r4)
+ stw r17, VCPU_GPR(r17)(r4)
+ stw r18, VCPU_GPR(r18)(r4)
+ stw r19, VCPU_GPR(r19)(r4)
+ stw r20, VCPU_GPR(r20)(r4)
+ stw r21, VCPU_GPR(r21)(r4)
+ stw r22, VCPU_GPR(r22)(r4)
+ stw r23, VCPU_GPR(r23)(r4)
+ stw r24, VCPU_GPR(r24)(r4)
+ stw r25, VCPU_GPR(r25)(r4)
+ stw r26, VCPU_GPR(r26)(r4)
+ stw r27, VCPU_GPR(r27)(r4)
+ stw r28, VCPU_GPR(r28)(r4)
+ stw r29, VCPU_GPR(r29)(r4)
+ stw r30, VCPU_GPR(r30)(r4)
+ stw r31, VCPU_GPR(r31)(r4)
+..skip_inst_copy:
+
+ /* Also grab DEAR and ESR before the host can clobber them. */
+
+ andi. r7, r6, NEED_DEAR_MASK
+ beq ..skip_dear
+ mfspr r9, SPRN_DEAR
+ stw r9, VCPU_FAULT_DEAR(r4)
+..skip_dear:
+
+ andi. r7, r6, NEED_ESR_MASK
+ beq ..skip_esr
+ mfspr r9, SPRN_ESR
+ stw r9, VCPU_FAULT_ESR(r4)
+..skip_esr:
+
+ /* Save remaining volatile guest register state to vcpu. */
+ stw r0, VCPU_GPR(r0)(r4)
+ stw r1, VCPU_GPR(r1)(r4)
+ stw r2, VCPU_GPR(r2)(r4)
+ stw r10, VCPU_GPR(r10)(r4)
+ stw r11, VCPU_GPR(r11)(r4)
+ stw r12, VCPU_GPR(r12)(r4)
+ stw r13, VCPU_GPR(r13)(r4)
+ stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */
+ mflr r3
+ stw r3, VCPU_LR(r4)
+ mfxer r3
+ stw r3, VCPU_XER(r4)
+ mfspr r3, SPRN_SPRG_RSCRATCH0
+ stw r3, VCPU_GPR(r4)(r4)
+ mfspr r3, SPRN_SRR0
+ stw r3, VCPU_PC(r4)
+
+ /* Restore host stack pointer and PID before IVPR, since the host
+ * exception handlers use them. */
+ lwz r1, VCPU_HOST_STACK(r4)
+ lwz r3, VCPU_HOST_PID(r4)
+ mtspr SPRN_PID, r3
+
+#ifdef CONFIG_FSL_BOOKE
+ /* we cheat and know that Linux doesn't use PID1 which is always 0 */
+ lis r3, 0
+ mtspr SPRN_PID1, r3
+#endif
+
+ /* Restore host IVPR before re-enabling interrupts. We cheat and know
+ * that Linux IVPR is always 0xc0000000. */
+ lis r3, 0xc000
+ mtspr SPRN_IVPR, r3
+
+ /* Switch to kernel stack and jump to handler. */
+ LOAD_REG_ADDR(r3, kvmppc_handle_exit)
+ mtctr r3
+ lwz r3, HOST_RUN(r1)
+ lwz r2, HOST_R2(r1)
+ mr r14, r4 /* Save vcpu pointer. */
+
+ bctrl /* kvmppc_handle_exit() */
+
+ /* Restore vcpu pointer and the nonvolatiles we used. */
+ mr r4, r14
+ lwz r14, VCPU_GPR(r14)(r4)
+
+ /* Sometimes instruction emulation must restore complete GPR state. */
+ andi. r5, r3, RESUME_FLAG_NV
+ beq ..skip_nv_load
+ lwz r15, VCPU_GPR(r15)(r4)
+ lwz r16, VCPU_GPR(r16)(r4)
+ lwz r17, VCPU_GPR(r17)(r4)
+ lwz r18, VCPU_GPR(r18)(r4)
+ lwz r19, VCPU_GPR(r19)(r4)
+ lwz r20, VCPU_GPR(r20)(r4)
+ lwz r21, VCPU_GPR(r21)(r4)
+ lwz r22, VCPU_GPR(r22)(r4)
+ lwz r23, VCPU_GPR(r23)(r4)
+ lwz r24, VCPU_GPR(r24)(r4)
+ lwz r25, VCPU_GPR(r25)(r4)
+ lwz r26, VCPU_GPR(r26)(r4)
+ lwz r27, VCPU_GPR(r27)(r4)
+ lwz r28, VCPU_GPR(r28)(r4)
+ lwz r29, VCPU_GPR(r29)(r4)
+ lwz r30, VCPU_GPR(r30)(r4)
+ lwz r31, VCPU_GPR(r31)(r4)
+..skip_nv_load:
+
+ /* Should we return to the guest? */
+ andi. r5, r3, RESUME_FLAG_HOST
+ beq lightweight_exit
+
+ srawi r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+ /* Not returning to guest. */
+
+#ifdef CONFIG_SPE
+ /* save guest SPEFSCR and load host SPEFSCR */
+ mfspr r9, SPRN_SPEFSCR
+ stw r9, VCPU_SPEFSCR(r4)
+ lwz r9, VCPU_HOST_SPEFSCR(r4)
+ mtspr SPRN_SPEFSCR, r9
+#endif
+
+ /* We already saved guest volatile register state; now save the
+ * non-volatiles. */
+ stw r15, VCPU_GPR(r15)(r4)
+ stw r16, VCPU_GPR(r16)(r4)
+ stw r17, VCPU_GPR(r17)(r4)
+ stw r18, VCPU_GPR(r18)(r4)
+ stw r19, VCPU_GPR(r19)(r4)
+ stw r20, VCPU_GPR(r20)(r4)
+ stw r21, VCPU_GPR(r21)(r4)
+ stw r22, VCPU_GPR(r22)(r4)
+ stw r23, VCPU_GPR(r23)(r4)
+ stw r24, VCPU_GPR(r24)(r4)
+ stw r25, VCPU_GPR(r25)(r4)
+ stw r26, VCPU_GPR(r26)(r4)
+ stw r27, VCPU_GPR(r27)(r4)
+ stw r28, VCPU_GPR(r28)(r4)
+ stw r29, VCPU_GPR(r29)(r4)
+ stw r30, VCPU_GPR(r30)(r4)
+ stw r31, VCPU_GPR(r31)(r4)
+
+ /* Load host non-volatile register state from host stack. */
+ lwz r14, HOST_NV_GPR(r14)(r1)
+ lwz r15, HOST_NV_GPR(r15)(r1)
+ lwz r16, HOST_NV_GPR(r16)(r1)
+ lwz r17, HOST_NV_GPR(r17)(r1)
+ lwz r18, HOST_NV_GPR(r18)(r1)
+ lwz r19, HOST_NV_GPR(r19)(r1)
+ lwz r20, HOST_NV_GPR(r20)(r1)
+ lwz r21, HOST_NV_GPR(r21)(r1)
+ lwz r22, HOST_NV_GPR(r22)(r1)
+ lwz r23, HOST_NV_GPR(r23)(r1)
+ lwz r24, HOST_NV_GPR(r24)(r1)
+ lwz r25, HOST_NV_GPR(r25)(r1)
+ lwz r26, HOST_NV_GPR(r26)(r1)
+ lwz r27, HOST_NV_GPR(r27)(r1)
+ lwz r28, HOST_NV_GPR(r28)(r1)
+ lwz r29, HOST_NV_GPR(r29)(r1)
+ lwz r30, HOST_NV_GPR(r30)(r1)
+ lwz r31, HOST_NV_GPR(r31)(r1)
+
+ /* Return to kvm_vcpu_run(). */
+ lwz r4, HOST_STACK_LR(r1)
+ lwz r5, HOST_CR(r1)
+ addi r1, r1, HOST_STACK_SIZE
+ mtlr r4
+ mtcr r5
+ /* r3 still contains the return code from kvmppc_handle_exit(). */
+ blr
+
+
+/* Registers:
+ * r3: kvm_run pointer
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+ stwu r1, -HOST_STACK_SIZE(r1)
+ stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+
+ /* Save host state to stack. */
+ stw r3, HOST_RUN(r1)
+ mflr r3
+ stw r3, HOST_STACK_LR(r1)
+ mfcr r5
+ stw r5, HOST_CR(r1)
+
+ /* Save host non-volatile register state to stack. */
+ stw r14, HOST_NV_GPR(r14)(r1)
+ stw r15, HOST_NV_GPR(r15)(r1)
+ stw r16, HOST_NV_GPR(r16)(r1)
+ stw r17, HOST_NV_GPR(r17)(r1)
+ stw r18, HOST_NV_GPR(r18)(r1)
+ stw r19, HOST_NV_GPR(r19)(r1)
+ stw r20, HOST_NV_GPR(r20)(r1)
+ stw r21, HOST_NV_GPR(r21)(r1)
+ stw r22, HOST_NV_GPR(r22)(r1)
+ stw r23, HOST_NV_GPR(r23)(r1)
+ stw r24, HOST_NV_GPR(r24)(r1)
+ stw r25, HOST_NV_GPR(r25)(r1)
+ stw r26, HOST_NV_GPR(r26)(r1)
+ stw r27, HOST_NV_GPR(r27)(r1)
+ stw r28, HOST_NV_GPR(r28)(r1)
+ stw r29, HOST_NV_GPR(r29)(r1)
+ stw r30, HOST_NV_GPR(r30)(r1)
+ stw r31, HOST_NV_GPR(r31)(r1)
+
+ /* Load guest non-volatiles. */
+ lwz r14, VCPU_GPR(r14)(r4)
+ lwz r15, VCPU_GPR(r15)(r4)
+ lwz r16, VCPU_GPR(r16)(r4)
+ lwz r17, VCPU_GPR(r17)(r4)
+ lwz r18, VCPU_GPR(r18)(r4)
+ lwz r19, VCPU_GPR(r19)(r4)
+ lwz r20, VCPU_GPR(r20)(r4)
+ lwz r21, VCPU_GPR(r21)(r4)
+ lwz r22, VCPU_GPR(r22)(r4)
+ lwz r23, VCPU_GPR(r23)(r4)
+ lwz r24, VCPU_GPR(r24)(r4)
+ lwz r25, VCPU_GPR(r25)(r4)
+ lwz r26, VCPU_GPR(r26)(r4)
+ lwz r27, VCPU_GPR(r27)(r4)
+ lwz r28, VCPU_GPR(r28)(r4)
+ lwz r29, VCPU_GPR(r29)(r4)
+ lwz r30, VCPU_GPR(r30)(r4)
+ lwz r31, VCPU_GPR(r31)(r4)
+
+#ifdef CONFIG_SPE
+ /* save host SPEFSCR and load guest SPEFSCR */
+ mfspr r3, SPRN_SPEFSCR
+ stw r3, VCPU_HOST_SPEFSCR(r4)
+ lwz r3, VCPU_SPEFSCR(r4)
+ mtspr SPRN_SPEFSCR, r3
+#endif
+
+lightweight_exit:
+ stw r2, HOST_R2(r1)
+
+ mfspr r3, SPRN_PID
+ stw r3, VCPU_HOST_PID(r4)
+ lwz r3, VCPU_SHADOW_PID(r4)
+ mtspr SPRN_PID, r3
+
+#ifdef CONFIG_FSL_BOOKE
+ lwz r3, VCPU_SHADOW_PID1(r4)
+ mtspr SPRN_PID1, r3
+#endif
+
+#ifdef CONFIG_44x
+ iccci 0, 0 /* XXX hack */
+#endif
+
+ /* Load some guest volatiles. */
+ lwz r0, VCPU_GPR(r0)(r4)
+ lwz r2, VCPU_GPR(r2)(r4)
+ lwz r9, VCPU_GPR(r9)(r4)
+ lwz r10, VCPU_GPR(r10)(r4)
+ lwz r11, VCPU_GPR(r11)(r4)
+ lwz r12, VCPU_GPR(r12)(r4)
+ lwz r13, VCPU_GPR(r13)(r4)
+ lwz r3, VCPU_LR(r4)
+ mtlr r3
+ lwz r3, VCPU_XER(r4)
+ mtxer r3
+
+ /* Switch the IVPR. XXX If we take a TLB miss after this we're screwed,
+ * so how do we make sure vcpu won't fault? */
+ lis r8, kvmppc_booke_handlers@ha
+ lwz r8, kvmppc_booke_handlers@l(r8)
+ mtspr SPRN_IVPR, r8
+
+ /* Save vcpu pointer for the exception handlers. */
+ mtspr SPRN_SPRG_WVCPU, r4
+
+ lwz r5, VCPU_SHARED(r4)
+
+ /* Can't switch the stack pointer until after IVPR is switched,
+ * because host interrupt handlers would get confused. */
+ lwz r1, VCPU_GPR(r1)(r4)
+
+ /*
+ * Host interrupt handlers may have clobbered these
+ * guest-readable SPRGs, or the guest kernel may have
+ * written directly to the shared area, so we
+ * need to reload them here with the guest's values.
+ */
+ lwz r3, VCPU_SHARED_SPRG4(r5)
+ mtspr SPRN_SPRG4W, r3
+ lwz r3, VCPU_SHARED_SPRG5(r5)
+ mtspr SPRN_SPRG5W, r3
+ lwz r3, VCPU_SHARED_SPRG6(r5)
+ mtspr SPRN_SPRG6W, r3
+ lwz r3, VCPU_SHARED_SPRG7(r5)
+ mtspr SPRN_SPRG7W, r3
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save enter time */
+1:
+ mfspr r6, SPRN_TBRU
+ mfspr r7, SPRN_TBRL
+ mfspr r8, SPRN_TBRU
+ cmpw r8, r6
+ bne 1b
+ stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+ stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
+ /* Finish loading guest volatiles and jump to guest. */
+ lwz r3, VCPU_CTR(r4)
+ lwz r5, VCPU_CR(r4)
+ lwz r6, VCPU_PC(r4)
+ lwz r7, VCPU_SHADOW_MSR(r4)
+ mtctr r3
+ mtcr r5
+ mtsrr0 r6
+ mtsrr1 r7
+ lwz r5, VCPU_GPR(r5)(r4)
+ lwz r6, VCPU_GPR(r6)(r4)
+ lwz r7, VCPU_GPR(r7)(r4)
+ lwz r8, VCPU_GPR(r8)(r4)
+
+ /* Clear any debug events which occurred since we disabled MSR[DE].
+ * XXX This gives us a 3-instruction window in which a breakpoint
+ * intended for guest context could fire in the host instead. */
+ lis r3, 0xffff
+ ori r3, r3, 0xffff
+ mtspr SPRN_DBSR, r3
+
+ lwz r3, VCPU_GPR(r3)(r4)
+ lwz r4, VCPU_GPR(r4)(r4)
+ rfi
+
+#ifdef CONFIG_SPE
+_GLOBAL(kvmppc_save_guest_spe)
+ cmpi 0,r3,0
+ beqlr-
+ SAVE_32EVRS(0, r4, r3, VCPU_EVR)
+ evxor evr6, evr6, evr6
+ evmwumiaa evr6, evr6, evr6
+ li r4,VCPU_ACC
+ evstddx evr6, r4, r3 /* save acc */
+ blr
+
+_GLOBAL(kvmppc_load_guest_spe)
+ cmpi 0,r3,0
+ beqlr-
+ li r4,VCPU_ACC
+ evlddx evr6,r4,r3
+ evmra evr6,evr6 /* load acc */
+ REST_32EVRS(0, r4, r3, VCPU_EVR)
+ blr
+#endif
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
new file mode 100644
index 00000000..ddcd896f
--- /dev/null
+++ b/arch/powerpc/kvm/e500.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_e500.h>
+#include <asm/kvm_ppc.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvmppc_e500_tlb_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kvmppc_e500_tlb_put(vcpu);
+
+#ifdef CONFIG_SPE
+ if (vcpu->arch.shadow_msr & MSR_SPE)
+ kvmppc_vcpu_disable_spe(vcpu);
+#endif
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_e500_tlb_setup(vcpu_e500);
+
+ /* Registers init */
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu_e500->svr = mfspr(SPRN_SVR);
+
+ vcpu->arch.cpu_type = KVM_CPU_E500V2;
+
+ return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+ if (index < 0) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
+ KVM_SREGS_E_PM;
+ sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+ sregs->u.e.impl.fsl.features = 0;
+ sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+ sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+ sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+ sregs->u.e.mas0 = vcpu->arch.shared->mas0;
+ sregs->u.e.mas1 = vcpu->arch.shared->mas1;
+ sregs->u.e.mas2 = vcpu->arch.shared->mas2;
+ sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
+ sregs->u.e.mas4 = vcpu->arch.shared->mas4;
+ sregs->u.e.mas6 = vcpu->arch.shared->mas6;
+
+ sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
+ sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
+ sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
+ sregs->u.e.tlbcfg[2] = 0;
+ sregs->u.e.tlbcfg[3] = 0;
+
+ sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+ sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+ sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+ sregs->u.e.ivor_high[3] =
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+
+ kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+ vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+ vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+ vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+ }
+
+ if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+ vcpu->arch.shared->mas0 = sregs->u.e.mas0;
+ vcpu->arch.shared->mas1 = sregs->u.e.mas1;
+ vcpu->arch.shared->mas2 = sregs->u.e.mas2;
+ vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
+ vcpu->arch.shared->mas4 = sregs->u.e.mas4;
+ vcpu->arch.shared->mas6 = sregs->u.e.mas6;
+ }
+
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ if (sregs->u.e.features & KVM_SREGS_E_SPE) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
+ sregs->u.e.ivor_high[0];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
+ sregs->u.e.ivor_high[1];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
+ sregs->u.e.ivor_high[2];
+ }
+
+ if (sregs->u.e.features & KVM_SREGS_E_PM) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+ sregs->u.e.ivor_high[3];
+ }
+
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_e500) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vcpu = &vcpu_e500->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ err = kvmppc_e500_tlb_init(vcpu_e500);
+ if (err)
+ goto uninit_vcpu;
+
+ vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+ if (!vcpu->arch.shared)
+ goto uninit_tlb;
+
+ return vcpu;
+
+uninit_tlb:
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared);
+ kvm_vcpu_uninit(vcpu);
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+static int __init kvmppc_e500_init(void)
+{
+ int r, i;
+ unsigned long ivor[3];
+ unsigned long max_ivor = 0;
+
+ r = kvmppc_core_check_processor_compat();
+ if (r)
+ return r;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ /* copy extra E500 exception handlers */
+ ivor[0] = mfspr(SPRN_IVOR32);
+ ivor[1] = mfspr(SPRN_IVOR33);
+ ivor[2] = mfspr(SPRN_IVOR34);
+ for (i = 0; i < 3; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+}
+
+static void __exit kvmppc_e500_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500_init);
+module_exit(kvmppc_e500_exit);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
new file mode 100644
index 00000000..6d0b2bd5
--- /dev/null
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x_emulate.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_e500.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+#define XOP_TLBIVAX 786
+#define XOP_TLBSX 914
+#define XOP_TLBRE 946
+#define XOP_TLBWE 978
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int ra;
+ int rb;
+
+ switch (get_op(inst)) {
+ case 31:
+ switch (get_xop(inst)) {
+
+ case XOP_TLBRE:
+ emulated = kvmppc_e500_emul_tlbre(vcpu);
+ break;
+
+ case XOP_TLBWE:
+ emulated = kvmppc_e500_emul_tlbwe(vcpu);
+ break;
+
+ case XOP_TLBSX:
+ rb = get_rb(inst);
+ emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+ break;
+
+ case XOP_TLBIVAX:
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ if (emulated == EMULATE_FAIL)
+ emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
+
+ switch (sprn) {
+ case SPRN_PID:
+ kvmppc_set_pid(vcpu, spr_val);
+ break;
+ case SPRN_PID1:
+ if (spr_val != 0)
+ return EMULATE_FAIL;
+ vcpu_e500->pid[1] = spr_val; break;
+ case SPRN_PID2:
+ if (spr_val != 0)
+ return EMULATE_FAIL;
+ vcpu_e500->pid[2] = spr_val; break;
+ case SPRN_MAS0:
+ vcpu->arch.shared->mas0 = spr_val; break;
+ case SPRN_MAS1:
+ vcpu->arch.shared->mas1 = spr_val; break;
+ case SPRN_MAS2:
+ vcpu->arch.shared->mas2 = spr_val; break;
+ case SPRN_MAS3:
+ vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
+ vcpu->arch.shared->mas7_3 |= spr_val;
+ break;
+ case SPRN_MAS4:
+ vcpu->arch.shared->mas4 = spr_val; break;
+ case SPRN_MAS6:
+ vcpu->arch.shared->mas6 = spr_val; break;
+ case SPRN_MAS7:
+ vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
+ vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
+ break;
+ case SPRN_L1CSR0:
+ vcpu_e500->l1csr0 = spr_val;
+ vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
+ break;
+ case SPRN_L1CSR1:
+ vcpu_e500->l1csr1 = spr_val; break;
+ case SPRN_HID0:
+ vcpu_e500->hid0 = spr_val; break;
+ case SPRN_HID1:
+ vcpu_e500->hid1 = spr_val; break;
+
+ case SPRN_MMUCSR0:
+ emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
+ spr_val);
+ break;
+
+ /* extra exceptions */
+ case SPRN_IVOR32:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
+ break;
+ case SPRN_IVOR34:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
+ break;
+ case SPRN_IVOR35:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
+ break;
+
+ default:
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ }
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int emulated = EMULATE_DONE;
+ unsigned long val;
+
+ switch (sprn) {
+ case SPRN_PID:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
+ case SPRN_PID1:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
+ case SPRN_PID2:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
+ case SPRN_MAS0:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break;
+ case SPRN_MAS1:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break;
+ case SPRN_MAS2:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break;
+ case SPRN_MAS3:
+ val = (u32)vcpu->arch.shared->mas7_3;
+ kvmppc_set_gpr(vcpu, rt, val);
+ break;
+ case SPRN_MAS4:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break;
+ case SPRN_MAS6:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break;
+ case SPRN_MAS7:
+ val = vcpu->arch.shared->mas7_3 >> 32;
+ kvmppc_set_gpr(vcpu, rt, val);
+ break;
+ case SPRN_TLB0CFG:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
+ case SPRN_TLB1CFG:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
+ case SPRN_L1CSR0:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
+ case SPRN_L1CSR1:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
+ case SPRN_HID0:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
+ case SPRN_HID1:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
+ case SPRN_SVR:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
+
+ case SPRN_MMUCSR0:
+ kvmppc_set_gpr(vcpu, rt, 0); break;
+
+ case SPRN_MMUCFG:
+ kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
+
+ /* extra exceptions */
+ case SPRN_IVOR32:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
+ break;
+ case SPRN_IVOR33:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
+ break;
+ case SPRN_IVOR34:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
+ break;
+ case SPRN_IVOR35:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
+ break;
+ default:
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ }
+
+ return emulated;
+}
+
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
new file mode 100644
index 00000000..6e53e416
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -0,0 +1,1392 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_e500.h>
+
+#include "../mm/mmu_decl.h"
+#include "e500_tlb.h"
+#include "trace.h"
+#include "timing.h"
+
+#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
+
+struct id {
+ unsigned long val;
+ struct id **pentry;
+};
+
+#define NUM_TIDS 256
+
+/*
+ * This table provide mappings from:
+ * (guestAS,guestTID,guestPR) --> ID of physical cpu
+ * guestAS [0..1]
+ * guestTID [0..255]
+ * guestPR [0..1]
+ * ID [1..255]
+ * Each vcpu keeps one vcpu_id_table.
+ */
+struct vcpu_id_table {
+ struct id id[2][NUM_TIDS][2];
+};
+
+/*
+ * This table provide reversed mappings of vcpu_id_table:
+ * ID --> address of vcpu_id_table item.
+ * Each physical core has one pcpu_id_table.
+ */
+struct pcpu_id_table {
+ struct id *entry[NUM_TIDS];
+};
+
+static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
+
+/* This variable keeps last used shadow ID on local core.
+ * The valid range of shadow ID is [1..255] */
+static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
+
+static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
+
+static struct kvm_book3e_206_tlb_entry *get_entry(
+ struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+ int offset = vcpu_e500->gtlb_offset[tlbsel];
+ return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
+/*
+ * Allocate a free shadow id and setup a valid sid mapping in given entry.
+ * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_setup_one(struct id *entry)
+{
+ unsigned long sid;
+ int ret = -1;
+
+ sid = ++(__get_cpu_var(pcpu_last_used_sid));
+ if (sid < NUM_TIDS) {
+ __get_cpu_var(pcpu_sids).entry[sid] = entry;
+ entry->val = sid;
+ entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
+ ret = sid;
+ }
+
+ /*
+ * If sid == NUM_TIDS, we've run out of sids. We return -1, and
+ * the caller will invalidate everything and start over.
+ *
+ * sid > NUM_TIDS indicates a race, which we disable preemption to
+ * avoid.
+ */
+ WARN_ON(sid > NUM_TIDS);
+
+ return ret;
+}
+
+/*
+ * Check if given entry contain a valid shadow id mapping.
+ * An ID mapping is considered valid only if
+ * both vcpu and pcpu know this mapping.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_lookup(struct id *entry)
+{
+ if (entry && entry->val != 0 &&
+ __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
+ entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
+ return entry->val;
+ return -1;
+}
+
+/* Invalidate all id mappings on local core -- call with preempt disabled */
+static inline void local_sid_destroy_all(void)
+{
+ __get_cpu_var(pcpu_last_used_sid) = 0;
+ memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
+}
+
+static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
+ return vcpu_e500->idt;
+}
+
+static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->idt);
+}
+
+/* Invalidate all mappings on vcpu */
+static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/* Invalidate one ID mapping on vcpu */
+static inline void kvmppc_e500_id_table_reset_one(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int as, int pid, int pr)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+
+ BUG_ON(as >= 2);
+ BUG_ON(pid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ idt->id[as][pid][pr].val = 0;
+ idt->id[as][pid][pr].pentry = NULL;
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/*
+ * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
+ * This function first lookup if a valid mapping exists,
+ * if not, then creates a new one.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ int sid;
+
+ BUG_ON(as >= 2);
+ BUG_ON(gid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ sid = local_sid_lookup(&idt->id[as][gid][pr]);
+
+ while (sid <= 0) {
+ /* No mapping yet */
+ sid = local_sid_setup_one(&idt->id[as][gid][pr]);
+ if (sid <= 0) {
+ _tlbil_all();
+ local_sid_destroy_all();
+ }
+
+ /* Update shadow pid when mappings are changed */
+ if (!avoid_recursion)
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+
+ return sid;
+}
+
+/* Map guest pid to shadow.
+ * We use PID to keep shadow of current guest non-zero PID,
+ * and use PID1 to keep shadow of guest zero PID.
+ * So that guest tlbe with TID=0 can be accessed at any time */
+void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ preempt_disable();
+ vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu),
+ get_cur_pid(&vcpu_e500->vcpu),
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu), 0,
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ preempt_enable();
+}
+
+static inline unsigned int gtlb0_get_next_victim(
+ struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ unsigned int victim;
+
+ victim = vcpu_e500->gtlb_nv[0]++;
+ if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways))
+ vcpu_e500->gtlb_nv[0] = 0;
+
+ return victim;
+}
+
+static inline unsigned int tlb1_max_shadow_size(void)
+{
+ /* reserve one entry for magic page */
+ return host_tlb_params[1].entries - tlbcam_index - 1;
+}
+
+static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
+}
+
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+{
+ /* Mask off reserved bits. */
+ mas3 &= MAS3_ATTRIB_MASK;
+
+ if (!usermode) {
+ /* Guest is in supervisor mode,
+ * so we need to translate guest
+ * supervisor permissions into user permissions. */
+ mas3 &= ~E500_TLB_USER_PERM_MASK;
+ mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
+ }
+
+ return mas3 | E500_TLB_SUPER_PERM_MASK;
+}
+
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+{
+#ifdef CONFIG_SMP
+ return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
+#else
+ return mas2 & MAS2_ATTRIB_MASK;
+#endif
+}
+
+/*
+ * writing shadow tlb entry to host TLB
+ */
+static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
+ uint32_t mas0)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS0, mas0);
+ mtspr(SPRN_MAS1, stlbe->mas1);
+ mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+ mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+ mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
+ asm volatile("isync; tlbwe" : : : "memory");
+ local_irq_restore(flags);
+
+ trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
+ stlbe->mas2, stlbe->mas7_3);
+}
+
+/*
+ * Acquire a mas0 with victim hint, as if we just took a TLB miss.
+ *
+ * We don't care about the address we're searching for, other than that it's
+ * in the right set and is not present in the TLB. Using a zero PID and a
+ * userspace address means we don't have to set and then restore MAS5, or
+ * calculate a proper MAS6 value.
+ */
+static u32 get_host_mas0(unsigned long eaddr)
+{
+ unsigned long flags;
+ u32 mas0;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS6, 0);
+ asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
+ mas0 = mfspr(SPRN_MAS0);
+ local_irq_restore(flags);
+
+ return mas0;
+}
+
+/* sesel is for tlb1 only */
+static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe)
+{
+ u32 mas0;
+
+ if (tlbsel == 0) {
+ mas0 = get_host_mas0(stlbe->mas2);
+ __write_host_tlbe(stlbe, mas0);
+ } else {
+ __write_host_tlbe(stlbe,
+ MAS0_TLBSEL(1) |
+ MAS0_ESEL(to_htlb1_esel(sesel)));
+ }
+}
+
+void kvmppc_map_magic(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_entry magic;
+ ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+ unsigned int stid;
+ pfn_t pfn;
+
+ pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
+ get_page(pfn_to_page(pfn));
+
+ preempt_disable();
+ stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0);
+
+ magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
+ MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
+ magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+ MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
+ magic.mas8 = 0;
+
+ __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
+ preempt_enable();
+}
+
+void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ /* Shadow PID may be expired on local core */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
+{
+}
+
+static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct kvm_book3e_206_tlb_entry *gtlbe =
+ get_entry(vcpu_e500, tlbsel, esel);
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ unsigned int pr, tid, ts, pid;
+ u32 val, eaddr;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+
+ preempt_disable();
+
+ /* One guest ID may be mapped to two shadow IDs */
+ for (pr = 0; pr < 2; pr++) {
+ /*
+ * The shadow PID can have a valid mapping on at most one
+ * host CPU. In the common case, it will be valid on this
+ * CPU, in which case (for TLB0) we do a local invalidation
+ * of the specific address.
+ *
+ * If the shadow PID is not valid on the current host CPU, or
+ * if we're invalidating a TLB1 entry, we invalidate the
+ * entire shadow PID.
+ */
+ if (tlbsel == 1 ||
+ (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) {
+ kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
+ continue;
+ }
+
+ /*
+ * The guest is invalidating a TLB0 entry which is in a PID
+ * that has a valid shadow mapping on this host CPU. We
+ * search host TLB0 to invalidate it's shadow TLB entry,
+ * similar to __tlbil_va except that we need to look in AS1.
+ */
+ val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+
+ local_irq_restore(flags);
+ }
+
+ preempt_enable();
+}
+
+static int tlb0_set_base(gva_t addr, int sets, int ways)
+{
+ int set_base;
+
+ set_base = (addr >> PAGE_SHIFT) & (sets - 1);
+ set_base *= ways;
+
+ return set_base;
+}
+
+static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
+{
+ return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets,
+ vcpu_e500->gtlb_params[0].ways);
+}
+
+static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel = get_tlb_esel_bit(vcpu);
+
+ if (tlbsel == 0) {
+ esel &= vcpu_e500->gtlb_params[0].ways - 1;
+ esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2);
+ } else {
+ esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1;
+ }
+
+ return esel;
+}
+
+/* Search the guest TLB for a matching entry. */
+static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
+ gva_t eaddr, int tlbsel, unsigned int pid, int as)
+{
+ int size = vcpu_e500->gtlb_params[tlbsel].entries;
+ unsigned int set_base, offset;
+ int i;
+
+ if (tlbsel == 0) {
+ set_base = gtlb0_set_base(vcpu_e500, eaddr);
+ size = vcpu_e500->gtlb_params[0].ways;
+ } else {
+ set_base = 0;
+ }
+
+ offset = vcpu_e500->gtlb_offset[tlbsel];
+
+ for (i = 0; i < size; i++) {
+ struct kvm_book3e_206_tlb_entry *tlbe =
+ &vcpu_e500->gtlb_arch[offset + set_base + i];
+ unsigned int tid;
+
+ if (eaddr < get_tlb_eaddr(tlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(tlbe))
+ continue;
+
+ tid = get_tlb_tid(tlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ if (get_tlb_ts(tlbe) != as && as != -1)
+ continue;
+
+ return set_base + i;
+ }
+
+ return -1;
+}
+
+static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ pfn_t pfn)
+{
+ ref->pfn = pfn;
+ ref->flags = E500_TLB_VALID;
+
+ if (tlbe_is_writable(gtlbe))
+ ref->flags |= E500_TLB_DIRTY;
+}
+
+static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
+{
+ if (ref->flags & E500_TLB_VALID) {
+ if (ref->flags & E500_TLB_DIRTY)
+ kvm_release_pfn_dirty(ref->pfn);
+ else
+ kvm_release_pfn_clean(ref->pfn);
+
+ ref->flags = 0;
+ }
+}
+
+static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int tlbsel = 0;
+ int i;
+
+ for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
+ struct tlbe_ref *ref =
+ &vcpu_e500->gtlb_priv[tlbsel][i].ref;
+ kvmppc_e500_ref_release(ref);
+ }
+}
+
+static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int stlbsel = 1;
+ int i;
+
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
+
+ for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
+ struct tlbe_ref *ref =
+ &vcpu_e500->tlb_refs[stlbsel][i];
+ kvmppc_e500_ref_release(ref);
+ }
+
+ clear_tlb_privs(vcpu_e500);
+}
+
+static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
+ unsigned int eaddr, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int victim, pidsel, tsized;
+ int tlbsel;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
+ victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
+ pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf;
+ tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
+
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+ | MAS1_TID(vcpu_e500->pid[pidsel])
+ | MAS1_TSIZE(tsized);
+ vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
+ | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
+ vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+ vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1)
+ | (get_cur_pid(vcpu) << 16)
+ | (as ? MAS6_SAS : 0);
+}
+
+/* TID must be supplied by the caller */
+static inline void kvmppc_e500_setup_stlbe(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ int tsize, struct tlbe_ref *ref, u64 gvaddr,
+ struct kvm_book3e_206_tlb_entry *stlbe)
+{
+ pfn_t pfn = ref->pfn;
+
+ BUG_ON(!(ref->flags & E500_TLB_VALID));
+
+ /* Force TS=1 IPROT=0 for all guest mappings. */
+ stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID;
+ stlbe->mas2 = (gvaddr & MAS2_EPN)
+ | e500_shadow_mas2_attrib(gtlbe->mas2,
+ vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
+ stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
+ | e500_shadow_mas3_attrib(gtlbe->mas7_3,
+ vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
+}
+
+static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+ int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
+ struct tlbe_ref *ref)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long pfn, hva;
+ int pfnmap = 0;
+ int tsize = BOOK3E_PAGESZ_4K;
+
+ /*
+ * Translate guest physical to true physical, acquiring
+ * a page reference if it is normal, non-reserved memory.
+ *
+ * gfn_to_memslot() must succeed because otherwise we wouldn't
+ * have gotten this far. Eventually we should just pass the slot
+ * pointer through from the first lookup.
+ */
+ slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
+ hva = gfn_to_hva_memslot(slot, gfn);
+
+ if (tlbsel == 1) {
+ struct vm_area_struct *vma;
+ down_read(&current->mm->mmap_sem);
+
+ vma = find_vma(current->mm, hva);
+ if (vma && hva >= vma->vm_start &&
+ (vma->vm_flags & VM_PFNMAP)) {
+ /*
+ * This VMA is a physically contiguous region (e.g.
+ * /dev/mem) that bypasses normal Linux page
+ * management. Find the overlap between the
+ * vma and the memslot.
+ */
+
+ unsigned long start, end;
+ unsigned long slot_start, slot_end;
+
+ pfnmap = 1;
+
+ start = vma->vm_pgoff;
+ end = start +
+ ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
+
+ pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
+
+ slot_start = pfn - (gfn - slot->base_gfn);
+ slot_end = slot_start + slot->npages;
+
+ if (start < slot_start)
+ start = slot_start;
+ if (end > slot_end)
+ end = slot_end;
+
+ tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+ MAS1_TSIZE_SHIFT;
+
+ /*
+ * e500 doesn't implement the lowest tsize bit,
+ * or 1K pages.
+ */
+ tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+
+ /*
+ * Now find the largest tsize (up to what the guest
+ * requested) that will cover gfn, stay within the
+ * range, and for which gfn and pfn are mutually
+ * aligned.
+ */
+
+ for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
+ unsigned long gfn_start, gfn_end, tsize_pages;
+ tsize_pages = 1 << (tsize - 2);
+
+ gfn_start = gfn & ~(tsize_pages - 1);
+ gfn_end = gfn_start + tsize_pages;
+
+ if (gfn_start + pfn - gfn < start)
+ continue;
+ if (gfn_end + pfn - gfn > end)
+ continue;
+ if ((gfn & (tsize_pages - 1)) !=
+ (pfn & (tsize_pages - 1)))
+ continue;
+
+ gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ pfn &= ~(tsize_pages - 1);
+ break;
+ }
+ } else if (vma && hva >= vma->vm_start &&
+ (vma->vm_flags & VM_HUGETLB)) {
+ unsigned long psize = vma_kernel_pagesize(vma);
+
+ tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+ MAS1_TSIZE_SHIFT;
+
+ /*
+ * Take the largest page size that satisfies both host
+ * and guest mapping
+ */
+ tsize = min(__ilog2(psize) - 10, tsize);
+
+ /*
+ * e500 doesn't implement the lowest tsize bit,
+ * or 1K pages.
+ */
+ tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+ }
+
+ up_read(&current->mm->mmap_sem);
+ }
+
+ if (likely(!pfnmap)) {
+ unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+ pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
+ if (is_error_pfn(pfn)) {
+ printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
+ (long)gfn);
+ kvm_release_pfn_clean(pfn);
+ return;
+ }
+
+ /* Align guest and physical address to page map boundaries */
+ pfn &= ~(tsize_pages - 1);
+ gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ }
+
+ /* Drop old ref and setup new one. */
+ kvmppc_e500_ref_release(ref);
+ kvmppc_e500_ref_setup(ref, gtlbe, pfn);
+
+ kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe);
+}
+
+/* XXX only map the one-one case, for now use TLB0 */
+static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int esel,
+ struct kvm_book3e_206_tlb_entry *stlbe)
+{
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ struct tlbe_ref *ref;
+
+ gtlbe = get_entry(vcpu_e500, 0, esel);
+ ref = &vcpu_e500->gtlb_priv[0][esel].ref;
+
+ kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+ get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+ gtlbe, 0, stlbe, ref);
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+/* XXX for both one-one and one-to-many , for now use TLB1 */
+static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+ struct kvm_book3e_206_tlb_entry *stlbe)
+{
+ struct tlbe_ref *ref;
+ unsigned int victim;
+
+ victim = vcpu_e500->host_tlb1_nv++;
+
+ if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
+ vcpu_e500->host_tlb1_nv = 0;
+
+ ref = &vcpu_e500->tlb_refs[1][victim];
+ kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
+
+ return victim;
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ /* Recalc shadow pid since MSR changes */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+static inline int kvmppc_e500_gtlbe_invalidate(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct kvm_book3e_206_tlb_entry *gtlbe =
+ get_entry(vcpu_e500, tlbsel, esel);
+
+ if (unlikely(get_tlb_iprot(gtlbe)))
+ return -1;
+
+ gtlbe->mas1 = 0;
+
+ return 0;
+}
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
+{
+ int esel;
+
+ if (value & MMUCSR0_TLB0FI)
+ for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
+ if (value & MMUCSR0_TLB1FI)
+ for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
+
+ /* Invalidate all vcpu id mappings */
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int ia;
+ int esel, tlbsel;
+ gva_t ea;
+
+ ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
+
+ ia = (ea >> 2) & 0x1;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (ea >> 3) & 0x1;
+
+ if (ia) {
+ /* invalidate all entries */
+ for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries;
+ esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ } else {
+ ea &= 0xfffff000;
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
+ get_cur_pid(vcpu), -1);
+ if (esel >= 0)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+
+ /* Invalidate all vcpu id mappings */
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel, esel;
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+
+ tlbsel = get_tlb_tlbsel(vcpu);
+ esel = get_tlb_esel(vcpu, tlbsel);
+
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+ vcpu->arch.shared->mas0 &= ~MAS0_NV(~0);
+ vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 = gtlbe->mas1;
+ vcpu->arch.shared->mas2 = gtlbe->mas2;
+ vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int as = !!get_cur_sas(vcpu);
+ unsigned int pid = get_cur_spid(vcpu);
+ int esel, tlbsel;
+ struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
+ gva_t ea;
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
+ if (esel >= 0) {
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+ break;
+ }
+ }
+
+ if (gtlbe) {
+ esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1;
+
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 = gtlbe->mas1;
+ vcpu->arch.shared->mas2 = gtlbe->mas2;
+ vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
+ } else {
+ int victim;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1;
+ victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
+
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel)
+ | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 =
+ (vcpu->arch.shared->mas6 & MAS6_SPID0)
+ | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+ | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
+ vcpu->arch.shared->mas2 &= MAS2_EPN;
+ vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
+ MAS2_ATTRIB_MASK;
+ vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 |
+ MAS3_U2 | MAS3_U3;
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+ return EMULATE_DONE;
+}
+
+/* sesel is for tlb1 only */
+static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ struct kvm_book3e_206_tlb_entry *stlbe,
+ int stlbsel, int sesel)
+{
+ int stid;
+
+ preempt_disable();
+ stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
+ get_tlb_tid(gtlbe),
+ get_cur_pr(&vcpu_e500->vcpu), 0);
+
+ stlbe->mas1 |= MAS1_TID(stid);
+ write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
+ preempt_enable();
+}
+
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ int tlbsel, esel;
+
+ tlbsel = get_tlb_tlbsel(vcpu);
+ esel = get_tlb_esel(vcpu, tlbsel);
+
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+
+ if (get_tlb_v(gtlbe))
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+
+ gtlbe->mas1 = vcpu->arch.shared->mas1;
+ gtlbe->mas2 = vcpu->arch.shared->mas2;
+ gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
+
+ trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
+ gtlbe->mas2, gtlbe->mas7_3);
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+ if (tlbe_is_host_safe(vcpu, gtlbe)) {
+ struct kvm_book3e_206_tlb_entry stlbe;
+ int stlbsel, sesel;
+ u64 eaddr;
+ u64 raddr;
+
+ switch (tlbsel) {
+ case 0:
+ /* TLB0 */
+ gtlbe->mas1 &= ~MAS1_TSIZE(~0);
+ gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+
+ stlbsel = 0;
+ kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+ sesel = 0; /* unused */
+
+ break;
+
+ case 1:
+ /* TLB1 */
+ eaddr = get_tlb_eaddr(gtlbe);
+ raddr = get_tlb_raddr(gtlbe);
+
+ /* Create a 4KB mapping on the host.
+ * If the guest wanted a large page,
+ * only the first 4KB is mapped here and the rest
+ * are mapped on the fly. */
+ stlbsel = 1;
+ sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
+ raddr >> PAGE_SHIFT, gtlbe, &stlbe);
+ break;
+
+ default:
+ BUG();
+ }
+
+ write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+ return EMULATE_DONE;
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
+ gva_t eaddr)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ u64 pgmask;
+
+ gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+ pgmask = get_tlb_bytes(gtlbe) - 1;
+
+ return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
+ unsigned int index)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe_priv *priv;
+ struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
+ int tlbsel = tlbsel_of(index);
+ int esel = esel_of(index);
+ int stlbsel, sesel;
+
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+
+ switch (tlbsel) {
+ case 0:
+ stlbsel = 0;
+ sesel = 0; /* unused */
+ priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
+
+ kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
+ &priv->ref, eaddr, &stlbe);
+ break;
+
+ case 1: {
+ gfn_t gfn = gpaddr >> PAGE_SHIFT;
+
+ stlbsel = 1;
+ sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
+ gtlbe, &stlbe);
+ break;
+ }
+
+ default:
+ BUG();
+ break;
+ }
+
+ write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
+}
+
+int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+ gva_t eaddr, unsigned int pid, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel, tlbsel;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+ if (esel >= 0)
+ return index_of(tlbsel, esel);
+ }
+
+ return -1;
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ if (vcpu->arch.pid != pid) {
+ vcpu_e500->pid[0] = vcpu->arch.pid = pid;
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+}
+
+void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ struct kvm_book3e_206_tlb_entry *tlbe;
+
+ /* Insert large initial mapping for guest. */
+ tlbe = get_entry(vcpu_e500, 1, 0);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
+ tlbe->mas2 = 0;
+ tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
+
+ /* 4K map for serial output. Used by kernel wrapper. */
+ tlbe = get_entry(vcpu_e500, 1, 1);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+ tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int i;
+
+ clear_tlb_refs(vcpu_e500);
+ kfree(vcpu_e500->gtlb_priv[0]);
+ kfree(vcpu_e500->gtlb_priv[1]);
+
+ if (vcpu_e500->shared_tlb_pages) {
+ vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+ PAGE_SIZE)));
+
+ for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) {
+ set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
+ put_page(vcpu_e500->shared_tlb_pages[i]);
+ }
+
+ vcpu_e500->num_shared_tlb_pages = 0;
+ vcpu_e500->shared_tlb_pages = NULL;
+ } else {
+ kfree(vcpu_e500->gtlb_arch);
+ }
+
+ vcpu_e500->gtlb_arch = NULL;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_config_tlb *cfg)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_params params;
+ char *virt;
+ struct page **pages;
+ struct tlbe_priv *privs[2] = {};
+ size_t array_len;
+ u32 sets;
+ int num_pages, ret, i;
+
+ if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+ return -EINVAL;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+ sizeof(params)))
+ return -EFAULT;
+
+ if (params.tlb_sizes[1] > 64)
+ return -EINVAL;
+ if (params.tlb_ways[1] != params.tlb_sizes[1])
+ return -EINVAL;
+ if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+ return -EINVAL;
+ if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0)
+ return -EINVAL;
+
+ if (!is_power_of_2(params.tlb_ways[0]))
+ return -EINVAL;
+
+ sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
+ if (!is_power_of_2(sets))
+ return -EINVAL;
+
+ array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
+ array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
+
+ if (cfg->array_len < array_len)
+ return -EINVAL;
+
+ num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
+ cfg->array / PAGE_SIZE;
+ pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+ if (ret < 0)
+ goto err_pages;
+
+ if (ret != num_pages) {
+ num_pages = ret;
+ ret = -EFAULT;
+ goto err_put_page;
+ }
+
+ virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
+ if (!virt)
+ goto err_put_page;
+
+ privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
+ GFP_KERNEL);
+ privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
+ GFP_KERNEL);
+
+ if (!privs[0] || !privs[1])
+ goto err_put_page;
+
+ free_gtlb(vcpu_e500);
+
+ vcpu_e500->gtlb_priv[0] = privs[0];
+ vcpu_e500->gtlb_priv[1] = privs[1];
+
+ vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
+ (virt + (cfg->array & (PAGE_SIZE - 1)));
+
+ vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0];
+ vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1];
+
+ vcpu_e500->gtlb_offset[0] = 0;
+ vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
+
+ vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ if (params.tlb_sizes[0] <= 2048)
+ vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
+ vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
+ vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu_e500->shared_tlb_pages = pages;
+ vcpu_e500->num_shared_tlb_pages = num_pages;
+
+ vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0];
+ vcpu_e500->gtlb_params[0].sets = sets;
+
+ vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
+ vcpu_e500->gtlb_params[1].sets = 1;
+
+ return 0;
+
+err_put_page:
+ kfree(privs[0]);
+ kfree(privs[1]);
+
+ for (i = 0; i < num_pages; i++)
+ put_page(pages[i]);
+
+err_pages:
+ kfree(pages);
+ return ret;
+}
+
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_dirty_tlb *dirty)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ clear_tlb_refs(vcpu_e500);
+ return 0;
+}
+
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
+ int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
+
+ host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
+ host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+ /*
+ * This should never happen on real e500 hardware, but is
+ * architecturally possible -- e.g. in some weird nested
+ * virtualization case.
+ */
+ if (host_tlb_params[0].entries == 0 ||
+ host_tlb_params[1].entries == 0) {
+ pr_err("%s: need to know host tlb size\n", __func__);
+ return -ENODEV;
+ }
+
+ host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >>
+ TLBnCFG_ASSOC_SHIFT;
+ host_tlb_params[1].ways = host_tlb_params[1].entries;
+
+ if (!is_power_of_2(host_tlb_params[0].entries) ||
+ !is_power_of_2(host_tlb_params[0].ways) ||
+ host_tlb_params[0].entries < host_tlb_params[0].ways ||
+ host_tlb_params[0].ways == 0) {
+ pr_err("%s: bad tlb0 host config: %u entries %u ways\n",
+ __func__, host_tlb_params[0].entries,
+ host_tlb_params[0].ways);
+ return -ENODEV;
+ }
+
+ host_tlb_params[0].sets =
+ host_tlb_params[0].entries / host_tlb_params[0].ways;
+ host_tlb_params[1].sets = 1;
+
+ vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
+ vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
+
+ vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM;
+ vcpu_e500->gtlb_params[0].sets =
+ KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
+
+ vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
+ vcpu_e500->gtlb_params[1].sets = 1;
+
+ vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+ if (!vcpu_e500->gtlb_arch)
+ return -ENOMEM;
+
+ vcpu_e500->gtlb_offset[0] = 0;
+ vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
+
+ vcpu_e500->tlb_refs[0] =
+ kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->tlb_refs[0])
+ goto err;
+
+ vcpu_e500->tlb_refs[1] =
+ kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->tlb_refs[1])
+ goto err;
+
+ vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
+ vcpu_e500->gtlb_params[0].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->gtlb_priv[0])
+ goto err;
+
+ vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
+ vcpu_e500->gtlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->gtlb_priv[1])
+ goto err;
+
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ goto err;
+
+ /* Init TLB configuration register */
+ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries;
+ vcpu_e500->tlb0cfg |=
+ vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries;
+ vcpu_e500->tlb0cfg |=
+ vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
+
+ return 0;
+
+err:
+ free_gtlb(vcpu_e500);
+ kfree(vcpu_e500->tlb_refs[0]);
+ kfree(vcpu_e500->tlb_refs[1]);
+ return -1;
+}
+
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ free_gtlb(vcpu_e500);
+ kvmppc_e500_id_table_free(vcpu_e500);
+
+ kfree(vcpu_e500->tlb_refs[0]);
+ kfree(vcpu_e500->tlb_refs[1]);
+}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
new file mode 100644
index 00000000..5c6d2d7b
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __KVM_E500_TLB_H__
+#define __KVM_E500_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-book3e.h>
+#include <asm/tlb.h>
+#include <asm/kvm_e500.h>
+
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE 128
+#define KVM_E500_TLB0_WAY_NUM 2
+
+#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE 16
+
+#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index) ((index) >> 16)
+#define esel_of(index) ((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+ (MAS2_X0 | MAS2_X1)
+#define MAS3_ATTRIB_MASK \
+ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
+extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
+extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
+extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
+extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
+
+/* TLB helper functions */
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 7) & 0x1f;
+}
+
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas2 & 0xfffff000;
+}
+
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1ULL << 10 << pgsize;
+}
+
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ u64 bytes = get_tlb_bytes(tlbe);
+ return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas7_3 & ~0xfffULL;
+}
+
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
+}
+
+static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & MSR_PR);
+}
+
+static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * Manual says that tlbsel has 2 bits wide.
+ * Since we only have two TLBs, only lower bit is used.
+ */
+ return (vcpu->arch.shared->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
new file mode 100644
index 00000000..968f4010
--- /dev/null
+++ b/arch/powerpc/kvm/emulate.c
@@ -0,0 +1,542 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/jiffies.h>
+#include <linux/hrtimer.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+
+#include <asm/reg.h>
+#include <asm/time.h>
+#include <asm/byteorder.h>
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include "timing.h"
+#include "trace.h"
+
+#define OP_TRAP 3
+#define OP_TRAP_64 2
+
+#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_LBZX 87
+#define OP_31_XOP_STWX 151
+#define OP_31_XOP_STBX 215
+#define OP_31_XOP_LBZUX 119
+#define OP_31_XOP_STBUX 247
+#define OP_31_XOP_LHZX 279
+#define OP_31_XOP_LHZUX 311
+#define OP_31_XOP_MFSPR 339
+#define OP_31_XOP_LHAX 343
+#define OP_31_XOP_STHX 407
+#define OP_31_XOP_STHUX 439
+#define OP_31_XOP_MTSPR 467
+#define OP_31_XOP_DCBI 470
+#define OP_31_XOP_LWBRX 534
+#define OP_31_XOP_TLBSYNC 566
+#define OP_31_XOP_STWBRX 662
+#define OP_31_XOP_LHBRX 790
+#define OP_31_XOP_STHBRX 918
+
+#define OP_LWZ 32
+#define OP_LWZU 33
+#define OP_LBZ 34
+#define OP_LBZU 35
+#define OP_STW 36
+#define OP_STWU 37
+#define OP_STB 38
+#define OP_STBU 39
+#define OP_LHZ 40
+#define OP_LHZU 41
+#define OP_LHA 42
+#define OP_LHAU 43
+#define OP_STH 44
+#define OP_STHU 45
+
+void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+{
+ unsigned long dec_nsec;
+ unsigned long long dec_time;
+
+ pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+
+#ifdef CONFIG_PPC_BOOK3S
+ /* mtdec lowers the interrupt line when positive. */
+ kvmppc_core_dequeue_dec(vcpu);
+
+ /* POWER4+ triggers a dec interrupt if the value is < 0 */
+ if (vcpu->arch.dec & 0x80000000) {
+ kvmppc_core_queue_dec(vcpu);
+ return;
+ }
+#endif
+
+#ifdef CONFIG_BOOKE
+ /* On BOOKE, DEC = 0 is as good as decrementer not enabled */
+ if (vcpu->arch.dec == 0)
+ return;
+#endif
+
+ /*
+ * The decrementer ticks at the same rate as the timebase, so
+ * that's how we convert the guest DEC value to the number of
+ * host ticks.
+ */
+
+ dec_time = vcpu->arch.dec;
+ dec_time *= 1000;
+ do_div(dec_time, tb_ticks_per_usec);
+ dec_nsec = do_div(dec_time, NSEC_PER_SEC);
+ hrtimer_start(&vcpu->arch.dec_timer,
+ ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
+ vcpu->arch.dec_jiffies = get_tb();
+}
+
+u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+{
+ u64 jd = tb - vcpu->arch.dec_jiffies;
+
+#ifdef CONFIG_BOOKE
+ if (vcpu->arch.dec < jd)
+ return 0;
+#endif
+
+ return vcpu->arch.dec - jd;
+}
+
+/* XXX to do:
+ * lhax
+ * lhaux
+ * lswx
+ * lswi
+ * stswx
+ * stswi
+ * lha
+ * lhau
+ * lmw
+ * stmw
+ *
+ * XXX is_bigendian should depend on MMU mapping or MSR[LE]
+ */
+/* XXX Should probably auto-generate instruction decoding for a particular core
+ * from opcode tables in the future. */
+int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ u32 inst = kvmppc_get_last_inst(vcpu);
+ u32 ea;
+ int ra;
+ int rb;
+ int rs;
+ int rt;
+ int sprn;
+ enum emulation_result emulated = EMULATE_DONE;
+ int advance = 1;
+
+ /* this default type might be overwritten by subcategories */
+ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
+ pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
+
+ switch (get_op(inst)) {
+ case OP_TRAP:
+#ifdef CONFIG_PPC_BOOK3S
+ case OP_TRAP_64:
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
+#else
+ kvmppc_core_queue_program(vcpu,
+ vcpu->arch.shared->esr | ESR_PTR);
+#endif
+ advance = 0;
+ break;
+
+ case 31:
+ switch (get_xop(inst)) {
+
+ case OP_31_XOP_LWZX:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+ break;
+
+ case OP_31_XOP_LBZX:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+ break;
+
+ case OP_31_XOP_LBZUX:
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+ emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+ kvmppc_set_gpr(vcpu, ra, ea);
+ break;
+
+ case OP_31_XOP_STWX:
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 4, 1);
+ break;
+
+ case OP_31_XOP_STBX:
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 1, 1);
+ break;
+
+ case OP_31_XOP_STBUX:
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 1, 1);
+ kvmppc_set_gpr(vcpu, rs, ea);
+ break;
+
+ case OP_31_XOP_LHAX:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+ break;
+
+ case OP_31_XOP_LHZX:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ break;
+
+ case OP_31_XOP_LHZUX:
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ kvmppc_set_gpr(vcpu, ra, ea);
+ break;
+
+ case OP_31_XOP_MFSPR:
+ sprn = get_sprn(inst);
+ rt = get_rt(inst);
+
+ switch (sprn) {
+ case SPRN_SRR0:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
+ break;
+ case SPRN_SRR1:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
+ break;
+ case SPRN_PVR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
+ case SPRN_PIR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
+ case SPRN_MSSSR0:
+ kvmppc_set_gpr(vcpu, rt, 0); break;
+
+ /* Note: mftb and TBRL/TBWL are user-accessible, so
+ * the guest can always access the real TB anyways.
+ * In fact, we probably will never see these traps. */
+ case SPRN_TBWL:
+ kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
+ case SPRN_TBWU:
+ kvmppc_set_gpr(vcpu, rt, get_tb()); break;
+
+ case SPRN_SPRG0:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
+ break;
+ case SPRN_SPRG1:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
+ break;
+ case SPRN_SPRG2:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
+ break;
+ case SPRN_SPRG3:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
+ break;
+ /* Note: SPRG4-7 are user-readable, so we don't get
+ * a trap. */
+
+ case SPRN_DEC:
+ {
+ kvmppc_set_gpr(vcpu, rt,
+ kvmppc_get_dec(vcpu, get_tb()));
+ break;
+ }
+ default:
+ emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
+ if (emulated == EMULATE_FAIL) {
+ printk("mfspr: unknown spr %x\n", sprn);
+ kvmppc_set_gpr(vcpu, rt, 0);
+ }
+ break;
+ }
+ kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+ break;
+
+ case OP_31_XOP_STHX:
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 2, 1);
+ break;
+
+ case OP_31_XOP_STHUX:
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 2, 1);
+ kvmppc_set_gpr(vcpu, ra, ea);
+ break;
+
+ case OP_31_XOP_MTSPR:
+ sprn = get_sprn(inst);
+ rs = get_rs(inst);
+ switch (sprn) {
+ case SPRN_SRR0:
+ vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
+ break;
+ case SPRN_SRR1:
+ vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
+ break;
+
+ /* XXX We need to context-switch the timebase for
+ * watchdog and FIT. */
+ case SPRN_TBWL: break;
+ case SPRN_TBWU: break;
+
+ case SPRN_MSSSR0: break;
+
+ case SPRN_DEC:
+ vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ case SPRN_SPRG0:
+ vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
+ break;
+ case SPRN_SPRG1:
+ vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
+ break;
+ case SPRN_SPRG2:
+ vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
+ break;
+ case SPRN_SPRG3:
+ vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
+ break;
+
+ default:
+ emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+ if (emulated == EMULATE_FAIL)
+ printk("mtspr: unknown spr %x\n", sprn);
+ break;
+ }
+ kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+ break;
+
+ case OP_31_XOP_DCBI:
+ /* Do nothing. The guest is performing dcbi because
+ * hardware DMA is not snooped by the dcache, but
+ * emulated DMA either goes through the dcache as
+ * normal writes, or the host kernel has handled dcache
+ * coherence. */
+ break;
+
+ case OP_31_XOP_LWBRX:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
+ break;
+
+ case OP_31_XOP_TLBSYNC:
+ break;
+
+ case OP_31_XOP_STWBRX:
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 4, 0);
+ break;
+
+ case OP_31_XOP_LHBRX:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
+ break;
+
+ case OP_31_XOP_STHBRX:
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 2, 0);
+ break;
+
+ default:
+ /* Attempt core-specific emulation below. */
+ emulated = EMULATE_FAIL;
+ }
+ break;
+
+ case OP_LWZ:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+ break;
+
+ case OP_LWZU:
+ ra = get_ra(inst);
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ break;
+
+ case OP_LBZ:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+ break;
+
+ case OP_LBZU:
+ ra = get_ra(inst);
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ break;
+
+ case OP_STW:
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 4, 1);
+ break;
+
+ case OP_STWU:
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ break;
+
+ case OP_STB:
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 1, 1);
+ break;
+
+ case OP_STBU:
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 1, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ break;
+
+ case OP_LHZ:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ break;
+
+ case OP_LHZU:
+ ra = get_ra(inst);
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ break;
+
+ case OP_LHA:
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+ break;
+
+ case OP_LHAU:
+ ra = get_ra(inst);
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ break;
+
+ case OP_STH:
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 2, 1);
+ break;
+
+ case OP_STHU:
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
+ 2, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ if (emulated == EMULATE_FAIL) {
+ emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+ if (emulated == EMULATE_AGAIN) {
+ advance = 0;
+ } else if (emulated == EMULATE_FAIL) {
+ advance = 0;
+ printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
+ "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+ kvmppc_core_queue_program(vcpu, 0);
+ }
+ }
+
+ trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated);
+
+ /* Advance past emulated instruction. */
+ if (advance)
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+
+ return emulated;
+}
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
new file mode 100644
index 00000000..bf68d597
--- /dev/null
+++ b/arch/powerpc/kvm/fpu.S
@@ -0,0 +1,283 @@
+/*
+ * FPU helper code to use FPU operations from inside the kernel
+ *
+ * Copyright (C) 2010 Alexander Graf (agraf@suse.de)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/* Instructions operating on single parameters */
+
+/*
+ * Single operation with one input operand
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (short*)&result
+ * R5 = (short*)&param1
+ */
+#define FPS_ONE_IN(name) \
+_GLOBAL(fps_ ## name); \
+ lfd 0,0(r3); /* load up fpscr value */ \
+ MTFSF_L(0); \
+ lfs 0,0(r5); \
+ \
+ name 0,0; \
+ \
+ stfs 0,0(r4); \
+ mffs 0; \
+ stfd 0,0(r3); /* save new fpscr value */ \
+ blr
+
+/*
+ * Single operation with two input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (short*)&result
+ * R5 = (short*)&param1
+ * R6 = (short*)&param2
+ */
+#define FPS_TWO_IN(name) \
+_GLOBAL(fps_ ## name); \
+ lfd 0,0(r3); /* load up fpscr value */ \
+ MTFSF_L(0); \
+ lfs 0,0(r5); \
+ lfs 1,0(r6); \
+ \
+ name 0,0,1; \
+ \
+ stfs 0,0(r4); \
+ mffs 0; \
+ stfd 0,0(r3); /* save new fpscr value */ \
+ blr
+
+/*
+ * Single operation with three input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (short*)&result
+ * R5 = (short*)&param1
+ * R6 = (short*)&param2
+ * R7 = (short*)&param3
+ */
+#define FPS_THREE_IN(name) \
+_GLOBAL(fps_ ## name); \
+ lfd 0,0(r3); /* load up fpscr value */ \
+ MTFSF_L(0); \
+ lfs 0,0(r5); \
+ lfs 1,0(r6); \
+ lfs 2,0(r7); \
+ \
+ name 0,0,1,2; \
+ \
+ stfs 0,0(r4); \
+ mffs 0; \
+ stfd 0,0(r3); /* save new fpscr value */ \
+ blr
+
+FPS_ONE_IN(fres)
+FPS_ONE_IN(frsqrte)
+FPS_ONE_IN(fsqrts)
+FPS_TWO_IN(fadds)
+FPS_TWO_IN(fdivs)
+FPS_TWO_IN(fmuls)
+FPS_TWO_IN(fsubs)
+FPS_THREE_IN(fmadds)
+FPS_THREE_IN(fmsubs)
+FPS_THREE_IN(fnmadds)
+FPS_THREE_IN(fnmsubs)
+FPS_THREE_IN(fsel)
+
+
+/* Instructions operating on double parameters */
+
+/*
+ * Beginning of double instruction processing
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ * R7 = (double*)&param2 [load_two]
+ * R8 = (double*)&param3 [load_three]
+ * LR = instruction call function
+ */
+fpd_load_three:
+ lfd 2,0(r8) /* load param3 */
+fpd_load_two:
+ lfd 1,0(r7) /* load param2 */
+fpd_load_one:
+ lfd 0,0(r6) /* load param1 */
+fpd_load_none:
+ lfd 3,0(r3) /* load up fpscr value */
+ MTFSF_L(3)
+ lwz r6, 0(r4) /* load cr */
+ mtcr r6
+ blr
+
+/*
+ * End of double instruction processing
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * LR = caller of instruction call function
+ */
+fpd_return:
+ mfcr r6
+ stfd 0,0(r5) /* save result */
+ mffs 0
+ stfd 0,0(r3) /* save new fpscr value */
+ stw r6,0(r4) /* save new cr value */
+ blr
+
+/*
+ * Double operation with no input operand
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ */
+#define FPD_NONE_IN(name) \
+_GLOBAL(fpd_ ## name); \
+ mflr r12; \
+ bl fpd_load_none; \
+ mtlr r12; \
+ \
+ name. 0; /* call instruction */ \
+ b fpd_return
+
+/*
+ * Double operation with one input operand
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ */
+#define FPD_ONE_IN(name) \
+_GLOBAL(fpd_ ## name); \
+ mflr r12; \
+ bl fpd_load_one; \
+ mtlr r12; \
+ \
+ name. 0,0; /* call instruction */ \
+ b fpd_return
+
+/*
+ * Double operation with two input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ * R7 = (double*)&param2
+ * R8 = (double*)&param3
+ */
+#define FPD_TWO_IN(name) \
+_GLOBAL(fpd_ ## name); \
+ mflr r12; \
+ bl fpd_load_two; \
+ mtlr r12; \
+ \
+ name. 0,0,1; /* call instruction */ \
+ b fpd_return
+
+/*
+ * CR Double operation with two input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&param1
+ * R6 = (double*)&param2
+ * R7 = (double*)&param3
+ */
+#define FPD_TWO_IN_CR(name) \
+_GLOBAL(fpd_ ## name); \
+ lfd 1,0(r6); /* load param2 */ \
+ lfd 0,0(r5); /* load param1 */ \
+ lfd 3,0(r3); /* load up fpscr value */ \
+ MTFSF_L(3); \
+ lwz r6, 0(r4); /* load cr */ \
+ mtcr r6; \
+ \
+ name 0,0,1; /* call instruction */ \
+ mfcr r6; \
+ mffs 0; \
+ stfd 0,0(r3); /* save new fpscr value */ \
+ stw r6,0(r4); /* save new cr value */ \
+ blr
+
+/*
+ * Double operation with three input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ * R7 = (double*)&param2
+ * R8 = (double*)&param3
+ */
+#define FPD_THREE_IN(name) \
+_GLOBAL(fpd_ ## name); \
+ mflr r12; \
+ bl fpd_load_three; \
+ mtlr r12; \
+ \
+ name. 0,0,1,2; /* call instruction */ \
+ b fpd_return
+
+FPD_ONE_IN(fsqrts)
+FPD_ONE_IN(frsqrtes)
+FPD_ONE_IN(fres)
+FPD_ONE_IN(frsp)
+FPD_ONE_IN(fctiw)
+FPD_ONE_IN(fctiwz)
+FPD_ONE_IN(fsqrt)
+FPD_ONE_IN(fre)
+FPD_ONE_IN(frsqrte)
+FPD_ONE_IN(fneg)
+FPD_ONE_IN(fabs)
+FPD_TWO_IN(fadds)
+FPD_TWO_IN(fsubs)
+FPD_TWO_IN(fdivs)
+FPD_TWO_IN(fmuls)
+FPD_TWO_IN_CR(fcmpu)
+FPD_TWO_IN(fcpsgn)
+FPD_TWO_IN(fdiv)
+FPD_TWO_IN(fadd)
+FPD_TWO_IN(fmul)
+FPD_TWO_IN_CR(fcmpo)
+FPD_TWO_IN(fsub)
+FPD_THREE_IN(fmsubs)
+FPD_THREE_IN(fmadds)
+FPD_THREE_IN(fnmsubs)
+FPD_THREE_IN(fnmadds)
+FPD_THREE_IN(fsel)
+FPD_THREE_IN(fmsub)
+FPD_THREE_IN(fmadd)
+FPD_THREE_IN(fnmsub)
+FPD_THREE_IN(fnmadd)
+
+_GLOBAL(kvm_cvt_fd)
+ lfs 0,0(r3)
+ stfd 0,0(r4)
+ blr
+
+_GLOBAL(kvm_cvt_df)
+ lfd 0,0(r3)
+ stfs 0,0(r4)
+ blr
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
new file mode 100644
index 00000000..00d7e345
--- /dev/null
+++ b/arch/powerpc/kvm/powerpc.c
@@ -0,0 +1,818 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/vmalloc.h>
+#include <linux/hrtimer.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/tlbflush.h>
+#include <asm/cputhreads.h>
+#include "timing.h"
+#include "../mm/mmu_decl.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+ return !(v->arch.shared->msr & MSR_WE) ||
+ !!(v->arch.pending_exceptions) ||
+ v->requests;
+}
+
+int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
+{
+ int nr = kvmppc_get_gpr(vcpu, 11);
+ int r;
+ unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
+ unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
+ unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
+ unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
+ unsigned long r2 = 0;
+
+ if (!(vcpu->arch.shared->msr & MSR_SF)) {
+ /* 32 bit mode */
+ param1 &= 0xffffffff;
+ param2 &= 0xffffffff;
+ param3 &= 0xffffffff;
+ param4 &= 0xffffffff;
+ }
+
+ switch (nr) {
+ case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
+ {
+ vcpu->arch.magic_page_pa = param1;
+ vcpu->arch.magic_page_ea = param2;
+
+ r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
+
+ r = HC_EV_SUCCESS;
+ break;
+ }
+ case HC_VENDOR_KVM | KVM_HC_FEATURES:
+ r = HC_EV_SUCCESS;
+#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500)
+ /* XXX Missing magic page on 44x */
+ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
+#endif
+
+ /* Second return value is in r4 */
+ break;
+ default:
+ r = HC_EV_UNIMPLEMENTED;
+ break;
+ }
+
+ kvmppc_set_gpr(vcpu, 4, r2);
+
+ return r;
+}
+
+int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
+{
+ int r = false;
+
+ /* We have to know what CPU to virtualize */
+ if (!vcpu->arch.pvr)
+ goto out;
+
+ /* PAPR only works with book3s_64 */
+ if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
+ goto out;
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /* HV KVM can only do PAPR mode for now */
+ if (!vcpu->arch.papr_enabled)
+ goto out;
+#endif
+
+ r = true;
+
+out:
+ vcpu->arch.sane = r;
+ return r ? 0 : -EINVAL;
+}
+
+int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+ int r;
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* Future optimization: only reload non-volatiles if they were
+ * actually modified. */
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ /* We must reload nonvolatiles because "update" load/store
+ * instructions modify register state. */
+ /* Future optimization: only reload non-volatiles if they were
+ * actually modified. */
+ r = RESUME_HOST_NV;
+ break;
+ case EMULATE_FAIL:
+ /* XXX Deliver Program interrupt to guest. */
+ printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
+ kvmppc_get_last_inst(vcpu));
+ r = RESUME_HOST;
+ break;
+ default:
+ BUG();
+ }
+
+ return r;
+}
+
+int kvm_arch_hardware_enable(void *garbage)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+ *(int *)rtn = kvmppc_core_check_processor_compat();
+}
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+ if (type)
+ return -EINVAL;
+
+ return kvmppc_core_init_vm(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_arch_vcpu_free(vcpu);
+
+ mutex_lock(&kvm->lock);
+ for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+ kvm->vcpus[i] = NULL;
+
+ atomic_set(&kvm->online_vcpus, 0);
+
+ kvmppc_core_destroy_vm(kvm);
+
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+ int r;
+
+ switch (ext) {
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_BOOKE_SREGS:
+#else
+ case KVM_CAP_PPC_SEGSTATE:
+ case KVM_CAP_PPC_HIOR:
+ case KVM_CAP_PPC_PAPR:
+#endif
+ case KVM_CAP_PPC_UNSET_IRQ:
+ case KVM_CAP_PPC_IRQ_LEVEL:
+ case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_ONE_REG:
+ r = 1;
+ break;
+#ifndef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CAP_PPC_PAIRED_SINGLES:
+ case KVM_CAP_PPC_OSI:
+ case KVM_CAP_PPC_GET_PVINFO:
+#ifdef CONFIG_KVM_E500
+ case KVM_CAP_SW_TLB:
+#endif
+ r = 1;
+ break;
+ case KVM_CAP_COALESCED_MMIO:
+ r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+ break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CAP_SPAPR_TCE:
+ r = 1;
+ break;
+ case KVM_CAP_PPC_SMT:
+ r = threads_per_core;
+ break;
+ case KVM_CAP_PPC_RMA:
+ r = 1;
+ /* PPC970 requires an RMA */
+ if (cpu_has_feature(CPU_FTR_ARCH_201))
+ r = 2;
+ break;
+ case KVM_CAP_SYNC_MMU:
+ r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+ break;
+#endif
+ case KVM_CAP_NR_VCPUS:
+ /*
+ * Recommending a number of CPUs is somewhat arbitrary; we
+ * return the number of present CPUs for -HV (since a host
+ * will have secondary threads "offline"), and for other KVM
+ * implementations just count online CPUs.
+ */
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ r = num_present_cpus();
+#else
+ r = num_online_cpus();
+#endif
+ break;
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+ return r;
+
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+{
+ return 0;
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot old,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc)
+{
+ return kvmppc_core_prepare_memory_region(kvm, mem);
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ kvmppc_core_commit_memory_region(kvm, mem);
+}
+
+
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvm_vcpu *vcpu;
+ vcpu = kvmppc_core_vcpu_create(kvm, id);
+ if (!IS_ERR(vcpu)) {
+ vcpu->arch.wqp = &vcpu->wq;
+ kvmppc_create_vcpu_debugfs(vcpu, id);
+ }
+ return vcpu;
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ /* Make sure we're not using the vcpu anymore */
+ hrtimer_cancel(&vcpu->arch.dec_timer);
+ tasklet_kill(&vcpu->arch.tasklet);
+
+ kvmppc_remove_vcpu_debugfs(vcpu);
+ kvmppc_core_vcpu_free(vcpu);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return kvmppc_core_pending_dec(vcpu);
+}
+
+/*
+ * low level hrtimer wake routine. Because this runs in hardirq context
+ * we schedule a tasklet to do the real work.
+ */
+enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
+{
+ struct kvm_vcpu *vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
+ tasklet_schedule(&vcpu->arch.tasklet);
+
+ return HRTIMER_NORESTART;
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
+ vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+ vcpu->arch.dec_expires = ~(u64)0;
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+
+ return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ kvmppc_mmu_destroy(vcpu);
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+#ifdef CONFIG_BOOKE
+ /*
+ * vrsave (formerly usprg0) isn't used by Linux, but may
+ * be used by the guest.
+ *
+ * On non-booke this is associated with Altivec and
+ * is handled by code in book3s.c.
+ */
+ mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
+ kvmppc_core_vcpu_load(vcpu, cpu);
+ vcpu->cpu = smp_processor_id();
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kvmppc_core_vcpu_put(vcpu);
+#ifdef CONFIG_BOOKE
+ vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+ vcpu->cpu = -1;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ return -EINVAL;
+}
+
+static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
+}
+
+static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ u64 uninitialized_var(gpr);
+
+ if (run->mmio.len > sizeof(gpr)) {
+ printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
+ return;
+ }
+
+ if (vcpu->arch.mmio_is_bigendian) {
+ switch (run->mmio.len) {
+ case 8: gpr = *(u64 *)run->mmio.data; break;
+ case 4: gpr = *(u32 *)run->mmio.data; break;
+ case 2: gpr = *(u16 *)run->mmio.data; break;
+ case 1: gpr = *(u8 *)run->mmio.data; break;
+ }
+ } else {
+ /* Convert BE data from userland back to LE. */
+ switch (run->mmio.len) {
+ case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
+ case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
+ case 1: gpr = *(u8 *)run->mmio.data; break;
+ }
+ }
+
+ if (vcpu->arch.mmio_sign_extend) {
+ switch (run->mmio.len) {
+#ifdef CONFIG_PPC64
+ case 4:
+ gpr = (s64)(s32)gpr;
+ break;
+#endif
+ case 2:
+ gpr = (s64)(s16)gpr;
+ break;
+ case 1:
+ gpr = (s64)(s8)gpr;
+ break;
+ }
+ }
+
+ kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
+
+ switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
+ case KVM_MMIO_REG_GPR:
+ kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
+ break;
+ case KVM_MMIO_REG_FPR:
+ vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+ break;
+#ifdef CONFIG_PPC_BOOK3S
+ case KVM_MMIO_REG_QPR:
+ vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+ break;
+ case KVM_MMIO_REG_FQPR:
+ vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+ vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+ break;
+#endif
+ default:
+ BUG();
+ }
+}
+
+int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes, int is_bigendian)
+{
+ if (bytes > sizeof(run->mmio.data)) {
+ printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
+ run->mmio.len);
+ }
+
+ run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+ run->mmio.len = bytes;
+ run->mmio.is_write = 0;
+
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.mmio_is_bigendian = is_bigendian;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 0;
+ vcpu->arch.mmio_sign_extend = 0;
+
+ return EMULATE_DO_MMIO;
+}
+
+/* Same as above, but sign extends */
+int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes, int is_bigendian)
+{
+ int r;
+
+ r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
+ vcpu->arch.mmio_sign_extend = 1;
+
+ return r;
+}
+
+int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ u64 val, unsigned int bytes, int is_bigendian)
+{
+ void *data = run->mmio.data;
+
+ if (bytes > sizeof(run->mmio.data)) {
+ printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
+ run->mmio.len);
+ }
+
+ run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+ run->mmio.len = bytes;
+ run->mmio.is_write = 1;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 1;
+
+ /* Store the value at the lowest bytes in 'data'. */
+ if (is_bigendian) {
+ switch (bytes) {
+ case 8: *(u64 *)data = val; break;
+ case 4: *(u32 *)data = val; break;
+ case 2: *(u16 *)data = val; break;
+ case 1: *(u8 *)data = val; break;
+ }
+ } else {
+ /* Store LE value into 'data'. */
+ switch (bytes) {
+ case 4: st_le32(data, val); break;
+ case 2: st_le16(data, val); break;
+ case 1: *(u8 *)data = val; break;
+ }
+ }
+
+ return EMULATE_DO_MMIO;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int r;
+ sigset_t sigsaved;
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+ if (vcpu->mmio_needed) {
+ if (!vcpu->mmio_is_write)
+ kvmppc_complete_mmio_load(vcpu, run);
+ vcpu->mmio_needed = 0;
+ } else if (vcpu->arch.dcr_needed) {
+ if (!vcpu->arch.dcr_is_write)
+ kvmppc_complete_dcr_load(vcpu, run);
+ vcpu->arch.dcr_needed = 0;
+ } else if (vcpu->arch.osi_needed) {
+ u64 *gprs = run->osi.gprs;
+ int i;
+
+ for (i = 0; i < 32; i++)
+ kvmppc_set_gpr(vcpu, i, gprs[i]);
+ vcpu->arch.osi_needed = 0;
+ } else if (vcpu->arch.hcall_needed) {
+ int i;
+
+ kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
+ for (i = 0; i < 9; ++i)
+ kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
+ vcpu->arch.hcall_needed = 0;
+ }
+
+ r = kvmppc_vcpu_run(run, vcpu);
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+ return r;
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int me;
+ int cpu = vcpu->cpu;
+
+ me = get_cpu();
+ if (waitqueue_active(vcpu->arch.wqp)) {
+ wake_up_interruptible(vcpu->arch.wqp);
+ vcpu->stat.halt_wakeup++;
+ } else if (cpu != me && cpu != -1) {
+ smp_send_reschedule(vcpu->cpu);
+ }
+ put_cpu();
+}
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
+{
+ if (irq->irq == KVM_INTERRUPT_UNSET) {
+ kvmppc_core_dequeue_external(vcpu, irq);
+ return 0;
+ }
+
+ kvmppc_core_queue_external(vcpu, irq);
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_PPC_OSI:
+ r = 0;
+ vcpu->arch.osi_enabled = true;
+ break;
+ case KVM_CAP_PPC_PAPR:
+ r = 0;
+ vcpu->arch.papr_enabled = true;
+ break;
+#ifdef CONFIG_KVM_E500
+ case KVM_CAP_SW_TLB: {
+ struct kvm_config_tlb cfg;
+ void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
+
+ r = -EFAULT;
+ if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
+ break;
+
+ r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
+ break;
+ }
+#endif
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ if (!r)
+ r = kvmppc_sanity_check(vcpu);
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
+
+ switch (ioctl) {
+ case KVM_INTERRUPT: {
+ struct kvm_interrupt irq;
+ r = -EFAULT;
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ goto out;
+ r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ goto out;
+ }
+
+ case KVM_ENABLE_CAP:
+ {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ goto out;
+ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ break;
+ }
+
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG:
+ {
+ struct kvm_one_reg reg;
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ goto out;
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
+ else
+ r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
+ break;
+ }
+
+#ifdef CONFIG_KVM_E500
+ case KVM_DIRTY_TLB: {
+ struct kvm_dirty_tlb dirty;
+ r = -EFAULT;
+ if (copy_from_user(&dirty, argp, sizeof(dirty)))
+ goto out;
+ r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+ break;
+ }
+#endif
+
+ default:
+ r = -EINVAL;
+ }
+
+out:
+ return r;
+}
+
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
+{
+ u32 inst_lis = 0x3c000000;
+ u32 inst_ori = 0x60000000;
+ u32 inst_nop = 0x60000000;
+ u32 inst_sc = 0x44000002;
+ u32 inst_imm_mask = 0xffff;
+
+ /*
+ * The hypercall to get into KVM from within guest context is as
+ * follows:
+ *
+ * lis r0, r0, KVM_SC_MAGIC_R0@h
+ * ori r0, KVM_SC_MAGIC_R0@l
+ * sc
+ * nop
+ */
+ pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
+ pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
+ pvinfo->hcall[2] = inst_sc;
+ pvinfo->hcall[3] = inst_nop;
+
+ return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ long r;
+
+ switch (ioctl) {
+ case KVM_PPC_GET_PVINFO: {
+ struct kvm_ppc_pvinfo pvinfo;
+ memset(&pvinfo, 0, sizeof(pvinfo));
+ r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
+ if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ break;
+ }
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CREATE_SPAPR_TCE: {
+ struct kvm_create_spapr_tce create_tce;
+ struct kvm *kvm = filp->private_data;
+
+ r = -EFAULT;
+ if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
+ goto out;
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+ goto out;
+ }
+
+ case KVM_ALLOCATE_RMA: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_allocate_rma rma;
+
+ r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
+ if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
+ r = -EFAULT;
+ break;
+ }
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
+ default:
+ r = -ENOTTY;
+ }
+
+out:
+ return r;
+}
+
+int kvm_arch_init(void *opaque)
+{
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 00000000..07b6110a
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,246 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+
+#include <asm/time.h>
+#include <asm-generic/div64.h>
+
+#include "timing.h"
+
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ /* Take a lock to avoid concurrent updates */
+ mutex_lock(&vcpu->arch.exit_timing_lock);
+
+ vcpu->arch.last_exit_type = 0xDEAD;
+ for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+ vcpu->arch.timing_count_type[i] = 0;
+ vcpu->arch.timing_max_duration[i] = 0;
+ vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
+ vcpu->arch.timing_sum_duration[i] = 0;
+ vcpu->arch.timing_sum_quad_duration[i] = 0;
+ }
+ vcpu->arch.timing_last_exit = 0;
+ vcpu->arch.timing_exit.tv64 = 0;
+ vcpu->arch.timing_last_enter.tv64 = 0;
+
+ mutex_unlock(&vcpu->arch.exit_timing_lock);
+}
+
+static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
+{
+ u64 old;
+
+ mutex_lock(&vcpu->arch.exit_timing_lock);
+
+ vcpu->arch.timing_count_type[type]++;
+
+ /* sum */
+ old = vcpu->arch.timing_sum_duration[type];
+ vcpu->arch.timing_sum_duration[type] += duration;
+ if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
+ printk(KERN_ERR"%s - wrap adding sum of durations"
+ " old %lld new %lld type %d exit # of type %d\n",
+ __func__, old, vcpu->arch.timing_sum_duration[type],
+ type, vcpu->arch.timing_count_type[type]);
+ }
+
+ /* square sum */
+ old = vcpu->arch.timing_sum_quad_duration[type];
+ vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
+ if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
+ printk(KERN_ERR"%s - wrap adding sum of squared durations"
+ " old %lld new %lld type %d exit # of type %d\n",
+ __func__, old,
+ vcpu->arch.timing_sum_quad_duration[type],
+ type, vcpu->arch.timing_count_type[type]);
+ }
+
+ /* set min/max */
+ if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
+ vcpu->arch.timing_min_duration[type] = duration;
+ if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
+ vcpu->arch.timing_max_duration[type] = duration;
+
+ mutex_unlock(&vcpu->arch.exit_timing_lock);
+}
+
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
+{
+ u64 exit = vcpu->arch.timing_last_exit;
+ u64 enter = vcpu->arch.timing_last_enter.tv64;
+
+ /* save exit time, used next exit when the reenter time is known */
+ vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
+
+ if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
+ return; /* skip incomplete cycle (e.g. after reset) */
+
+ /* update statistics for average and standard deviation */
+ add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
+ /* enter -> timing_last_exit is time spent in guest - log this too */
+ add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
+ TIMEINGUEST);
+}
+
+static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+ [MMIO_EXITS] = "MMIO",
+ [DCR_EXITS] = "DCR",
+ [SIGNAL_EXITS] = "SIGNAL",
+ [ITLB_REAL_MISS_EXITS] = "ITLBREAL",
+ [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
+ [DTLB_REAL_MISS_EXITS] = "DTLBREAL",
+ [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
+ [SYSCALL_EXITS] = "SYSCALL",
+ [ISI_EXITS] = "ISI",
+ [DSI_EXITS] = "DSI",
+ [EMULATED_INST_EXITS] = "EMULINST",
+ [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
+ [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
+ [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
+ [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
+ [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
+ [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
+ [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
+ [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
+ [EMULATED_RFI_EXITS] = "EMUL_RFI",
+ [DEC_EXITS] = "DEC",
+ [EXT_INTR_EXITS] = "EXTINT",
+ [HALT_WAKEUP] = "HALT",
+ [USR_PR_INST] = "USR_PR_INST",
+ [FP_UNAVAIL] = "FP_UNAVAIL",
+ [DEBUG_EXITS] = "DEBUG",
+ [TIMEINGUEST] = "TIMEINGUEST"
+};
+
+static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+{
+ struct kvm_vcpu *vcpu = m->private;
+ int i;
+ u64 min, max, sum, sum_quad;
+
+ seq_printf(m, "%s", "type count min max sum sum_squared\n");
+
+
+ for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+
+ min = vcpu->arch.timing_min_duration[i];
+ do_div(min, tb_ticks_per_usec);
+ max = vcpu->arch.timing_max_duration[i];
+ do_div(max, tb_ticks_per_usec);
+ sum = vcpu->arch.timing_sum_duration[i];
+ do_div(sum, tb_ticks_per_usec);
+ sum_quad = vcpu->arch.timing_sum_quad_duration[i];
+ do_div(sum_quad, tb_ticks_per_usec);
+
+ seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
+ kvm_exit_names[i],
+ vcpu->arch.timing_count_type[i],
+ min,
+ max,
+ sum,
+ sum_quad);
+
+ }
+ return 0;
+}
+
+/* Write 'c' to clear the timing statistics. */
+static ssize_t kvmppc_exit_timing_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int err = -EINVAL;
+ char c;
+
+ if (count > 1) {
+ goto done;
+ }
+
+ if (get_user(c, user_buf)) {
+ err = -EFAULT;
+ goto done;
+ }
+
+ if (c == 'c') {
+ struct seq_file *seqf = file->private_data;
+ struct kvm_vcpu *vcpu = seqf->private;
+ /* Write does not affect our buffers previously generated with
+ * show. seq_file is locked here to prevent races of init with
+ * a show call */
+ mutex_lock(&seqf->lock);
+ kvmppc_init_timing_stats(vcpu);
+ mutex_unlock(&seqf->lock);
+ err = count;
+ }
+
+done:
+ return err;
+}
+
+static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, kvmppc_exit_timing_show, inode->i_private);
+}
+
+static const struct file_operations kvmppc_exit_timing_fops = {
+ .owner = THIS_MODULE,
+ .open = kvmppc_exit_timing_open,
+ .read = seq_read,
+ .write = kvmppc_exit_timing_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+{
+ static char dbg_fname[50];
+ struct dentry *debugfs_file;
+
+ snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
+ current->pid, id);
+ debugfs_file = debugfs_create_file(dbg_fname, 0666,
+ kvm_debugfs_dir, vcpu,
+ &kvmppc_exit_timing_fops);
+
+ if (!debugfs_file) {
+ printk(KERN_ERR"%s: error creating debugfs file %s\n",
+ __func__, dbg_fname);
+ return;
+ }
+
+ vcpu->arch.debugfs_exit_timing = debugfs_file;
+}
+
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.debugfs_exit_timing) {
+ debugfs_remove(vcpu->arch.debugfs_exit_timing);
+ vcpu->arch.debugfs_exit_timing = NULL;
+ }
+}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 00000000..8167d42a
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,106 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_EXITTIMING_H__
+#define __POWERPC_KVM_EXITTIMING_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
+{
+ vcpu->arch.last_exit_type = type;
+}
+
+#else
+/* if exit timing is not configured there is no need to build the c file */
+static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+ unsigned int id) {}
+static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
+#endif /* CONFIG_KVM_EXIT_TIMING */
+
+/* account the exit in kvm_stats */
+static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
+{
+ /* type has to be known at build time for optimization */
+
+ /* The BUILD_BUG_ON below breaks in funny ways, commented out
+ * for now ... -BenH
+ BUILD_BUG_ON(!__builtin_constant_p(type));
+ */
+ switch (type) {
+ case EXT_INTR_EXITS:
+ vcpu->stat.ext_intr_exits++;
+ break;
+ case DEC_EXITS:
+ vcpu->stat.dec_exits++;
+ break;
+ case EMULATED_INST_EXITS:
+ vcpu->stat.emulated_inst_exits++;
+ break;
+ case DCR_EXITS:
+ vcpu->stat.dcr_exits++;
+ break;
+ case DSI_EXITS:
+ vcpu->stat.dsi_exits++;
+ break;
+ case ISI_EXITS:
+ vcpu->stat.isi_exits++;
+ break;
+ case SYSCALL_EXITS:
+ vcpu->stat.syscall_exits++;
+ break;
+ case DTLB_REAL_MISS_EXITS:
+ vcpu->stat.dtlb_real_miss_exits++;
+ break;
+ case DTLB_VIRT_MISS_EXITS:
+ vcpu->stat.dtlb_virt_miss_exits++;
+ break;
+ case MMIO_EXITS:
+ vcpu->stat.mmio_exits++;
+ break;
+ case ITLB_REAL_MISS_EXITS:
+ vcpu->stat.itlb_real_miss_exits++;
+ break;
+ case ITLB_VIRT_MISS_EXITS:
+ vcpu->stat.itlb_virt_miss_exits++;
+ break;
+ case SIGNAL_EXITS:
+ vcpu->stat.signal_exits++;
+ break;
+ }
+}
+
+/* wrapper to set exit time and account for it in kvm_stats */
+static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
+{
+ kvmppc_set_exit_type(vcpu, type);
+ kvmppc_account_exit_stat(vcpu, type);
+}
+
+#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
new file mode 100644
index 00000000..877186b7
--- /dev/null
+++ b/arch/powerpc/kvm/trace.h
@@ -0,0 +1,403 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Tracepoint for guest mode entry.
+ */
+TRACE_EVENT(kvm_ppc_instr,
+ TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate),
+ TP_ARGS(inst, _pc, emulate),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, inst )
+ __field( unsigned long, pc )
+ __field( unsigned int, emulate )
+ ),
+
+ TP_fast_assign(
+ __entry->inst = inst;
+ __entry->pc = _pc;
+ __entry->emulate = emulate;
+ ),
+
+ TP_printk("inst %u pc 0x%lx emulate %u\n",
+ __entry->inst, __entry->pc, __entry->emulate)
+);
+
+TRACE_EVENT(kvm_stlb_inval,
+ TP_PROTO(unsigned int stlb_index),
+ TP_ARGS(stlb_index),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, stlb_index )
+ ),
+
+ TP_fast_assign(
+ __entry->stlb_index = stlb_index;
+ ),
+
+ TP_printk("stlb_index %u", __entry->stlb_index)
+);
+
+TRACE_EVENT(kvm_stlb_write,
+ TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0,
+ unsigned int word1, unsigned int word2),
+ TP_ARGS(victim, tid, word0, word1, word2),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, victim )
+ __field( unsigned int, tid )
+ __field( unsigned int, word0 )
+ __field( unsigned int, word1 )
+ __field( unsigned int, word2 )
+ ),
+
+ TP_fast_assign(
+ __entry->victim = victim;
+ __entry->tid = tid;
+ __entry->word0 = word0;
+ __entry->word1 = word1;
+ __entry->word2 = word2;
+ ),
+
+ TP_printk("victim %u tid %u w0 %u w1 %u w2 %u",
+ __entry->victim, __entry->tid, __entry->word0,
+ __entry->word1, __entry->word2)
+);
+
+TRACE_EVENT(kvm_gtlb_write,
+ TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0,
+ unsigned int word1, unsigned int word2),
+ TP_ARGS(gtlb_index, tid, word0, word1, word2),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, gtlb_index )
+ __field( unsigned int, tid )
+ __field( unsigned int, word0 )
+ __field( unsigned int, word1 )
+ __field( unsigned int, word2 )
+ ),
+
+ TP_fast_assign(
+ __entry->gtlb_index = gtlb_index;
+ __entry->tid = tid;
+ __entry->word0 = word0;
+ __entry->word1 = word1;
+ __entry->word2 = word2;
+ ),
+
+ TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u",
+ __entry->gtlb_index, __entry->tid, __entry->word0,
+ __entry->word1, __entry->word2)
+);
+
+
+/*************************************************************************
+ * Book3S trace points *
+ *************************************************************************/
+
+#ifdef CONFIG_KVM_BOOK3S_PR
+
+TRACE_EVENT(kvm_book3s_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+ __field( unsigned long, srr1 )
+ ),
+
+ TP_fast_assign(
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = vcpu->arch.shared->msr;
+ svcpu = svcpu_get(vcpu);
+ __entry->srr1 = svcpu->shadow_srr1;
+ svcpu_put(svcpu);
+ ),
+
+ TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
+ __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
+ __entry->srr1)
+);
+
+TRACE_EVENT(kvm_book3s_reenter,
+ TP_PROTO(int r, struct kvm_vcpu *vcpu),
+ TP_ARGS(r, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, r )
+ __field( unsigned long, pc )
+ ),
+
+ TP_fast_assign(
+ __entry->r = r;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ ),
+
+ TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+ TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+ struct kvmppc_pte *orig_pte),
+ TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+ TP_STRUCT__entry(
+ __field( unsigned char, flag_w )
+ __field( unsigned char, flag_x )
+ __field( unsigned long, eaddr )
+ __field( unsigned long, hpteg )
+ __field( unsigned long, va )
+ __field( unsigned long long, vpage )
+ __field( unsigned long, hpaddr )
+ ),
+
+ TP_fast_assign(
+ __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+ __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
+ __entry->eaddr = orig_pte->eaddr;
+ __entry->hpteg = hpteg;
+ __entry->va = va;
+ __entry->vpage = orig_pte->vpage;
+ __entry->hpaddr = hpaddr;
+ ),
+
+ TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+ __entry->flag_w, __entry->flag_x, __entry->eaddr,
+ __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+TRACE_EVENT(kvm_book3s_mmu_map,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_va )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_va = pte->host_va;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_va, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_va )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_va = pte->host_va;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_va, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_flush,
+ TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+ unsigned long long p2),
+ TP_ARGS(type, vcpu, p1, p2),
+
+ TP_STRUCT__entry(
+ __field( int, count )
+ __field( unsigned long long, p1 )
+ __field( unsigned long long, p2 )
+ __field( const char *, type )
+ ),
+
+ TP_fast_assign(
+ __entry->count = to_book3s(vcpu)->hpte_cache_count;
+ __entry->p1 = p1;
+ __entry->p2 = p2;
+ __entry->type = type;
+ ),
+
+ TP_printk("Flush %d %sPTEs: %llx - %llx",
+ __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
+TRACE_EVENT(kvm_book3s_slb_found,
+ TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+ TP_ARGS(gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned long long, gvsid )
+ __field( unsigned long long, hvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->gvsid = gvsid;
+ __entry->hvsid = hvsid;
+ ),
+
+ TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+ TP_ARGS(sid_map_mask, gvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, gvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->gvsid = gvsid;
+ ),
+
+ TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+ SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+ unsigned long long hvsid),
+ TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, guest_vsid )
+ __field( unsigned long long, host_vsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->guest_vsid = gvsid;
+ __entry->host_vsid = hvsid;
+ ),
+
+ TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+ __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+ TP_PROTO(u64 slb_vsid, u64 slb_esid),
+ TP_ARGS(slb_vsid, slb_esid),
+
+ TP_STRUCT__entry(
+ __field( u64, slb_vsid )
+ __field( u64, slb_esid )
+ ),
+
+ TP_fast_assign(
+ __entry->slb_vsid = slb_vsid;
+ __entry->slb_esid = slb_esid;
+ ),
+
+ TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+
+/*************************************************************************
+ * Book3E trace points *
+ *************************************************************************/
+
+#ifdef CONFIG_BOOKE
+
+TRACE_EVENT(kvm_booke206_stlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas8 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas8 = mas8;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas8, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_gtlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+#endif
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>