summaryrefslogtreecommitdiff
path: root/ANDROID_3.4.5/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'ANDROID_3.4.5/arch/x86/xen')
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/Kconfig52
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/Makefile24
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/debugfs.c125
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/debugfs.h10
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/enlighten.c1557
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/grant-table.c127
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/irq.c133
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/mmu.c2371
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/mmu.h26
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/multicalls.c208
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/multicalls.h68
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/p2m.c949
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/pci-swiotlb-xen.c67
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/platform-pci-unplug.c143
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/setup.c427
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/smp.c592
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/spinlock.c454
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/suspend.c80
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/time.c525
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/trace.c62
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/vdso.h4
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/vga.c67
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/xen-asm.S142
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/xen-asm.h12
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/xen-asm_32.S230
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/xen-asm_64.S159
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/xen-head.S55
-rw-r--r--ANDROID_3.4.5/arch/x86/xen/xen-ops.h123
28 files changed, 0 insertions, 8792 deletions
diff --git a/ANDROID_3.4.5/arch/x86/xen/Kconfig b/ANDROID_3.4.5/arch/x86/xen/Kconfig
deleted file mode 100644
index fdce49c7..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/Kconfig
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-# This Kconfig describes xen options
-#
-
-config XEN
- bool "Xen guest support"
- select PARAVIRT
- select PARAVIRT_CLOCK
- depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
- depends on X86_CMPXCHG && X86_TSC
- help
- This is the Linux Xen port. Enabling this will allow the
- kernel to boot in a paravirtualized environment under the
- Xen hypervisor.
-
-config XEN_DOM0
- def_bool y
- depends on XEN && PCI_XEN && SWIOTLB_XEN
- depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
-
-# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
-# name in tools.
-config XEN_PRIVILEGED_GUEST
- def_bool XEN_DOM0
-
-config XEN_PVHVM
- def_bool y
- depends on XEN && PCI && X86_LOCAL_APIC
-
-config XEN_MAX_DOMAIN_MEMORY
- int
- default 500 if X86_64
- default 64 if X86_32
- depends on XEN
- help
- This only affects the sizing of some bss arrays, the unused
- portions of which are freed.
-
-config XEN_SAVE_RESTORE
- bool
- depends on XEN
- select HIBERNATE_CALLBACKS
- default y
-
-config XEN_DEBUG_FS
- bool "Enable Xen debug and tuning parameters in debugfs"
- depends on XEN && DEBUG_FS
- default n
- help
- Enable statistics output and various tuning options in debugfs.
- Enabling this option may incur a significant performance overhead.
-
diff --git a/ANDROID_3.4.5/arch/x86/xen/Makefile b/ANDROID_3.4.5/arch/x86/xen/Makefile
deleted file mode 100644
index add2c2d7..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-ifdef CONFIG_FUNCTION_TRACER
-# Do not profile debug and lowlevel utilities
-CFLAGS_REMOVE_spinlock.o = -pg
-CFLAGS_REMOVE_time.o = -pg
-CFLAGS_REMOVE_irq.o = -pg
-endif
-
-# Make sure early boot has no stackprotector
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_enlighten.o := $(nostackp)
-CFLAGS_mmu.o := $(nostackp)
-
-obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
- time.o xen-asm.o xen-asm_$(BITS).o \
- grant-table.o suspend.o platform-pci-unplug.o \
- p2m.o
-
-obj-$(CONFIG_EVENT_TRACING) += trace.o
-
-obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
-obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
-obj-$(CONFIG_XEN_DOM0) += vga.o
-obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/ANDROID_3.4.5/arch/x86/xen/debugfs.c b/ANDROID_3.4.5/arch/x86/xen/debugfs.c
deleted file mode 100644
index ef1db190..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/debugfs.c
+++ /dev/null
@@ -1,125 +0,0 @@
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-
-#include "debugfs.h"
-
-static struct dentry *d_xen_debug;
-
-struct dentry * __init xen_init_debugfs(void)
-{
- if (!d_xen_debug) {
- d_xen_debug = debugfs_create_dir("xen", NULL);
-
- if (!d_xen_debug)
- pr_warning("Could not create 'xen' debugfs directory\n");
- }
-
- return d_xen_debug;
-}
-
-struct array_data
-{
- void *array;
- unsigned elements;
-};
-
-static int u32_array_open(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
- return nonseekable_open(inode, file);
-}
-
-static size_t format_array(char *buf, size_t bufsize, const char *fmt,
- u32 *array, unsigned array_size)
-{
- size_t ret = 0;
- unsigned i;
-
- for(i = 0; i < array_size; i++) {
- size_t len;
-
- len = snprintf(buf, bufsize, fmt, array[i]);
- len++; /* ' ' or '\n' */
- ret += len;
-
- if (buf) {
- buf += len;
- bufsize -= len;
- buf[-1] = (i == array_size-1) ? '\n' : ' ';
- }
- }
-
- ret++; /* \0 */
- if (buf)
- *buf = '\0';
-
- return ret;
-}
-
-static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
-{
- size_t len = format_array(NULL, 0, fmt, array, array_size);
- char *ret;
-
- ret = kmalloc(len, GFP_KERNEL);
- if (ret == NULL)
- return NULL;
-
- format_array(ret, len, fmt, array, array_size);
- return ret;
-}
-
-static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
- loff_t *ppos)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct array_data *data = inode->i_private;
- size_t size;
-
- if (*ppos == 0) {
- if (file->private_data) {
- kfree(file->private_data);
- file->private_data = NULL;
- }
-
- file->private_data = format_array_alloc("%u", data->array, data->elements);
- }
-
- size = 0;
- if (file->private_data)
- size = strlen(file->private_data);
-
- return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
-}
-
-static int xen_array_release(struct inode *inode, struct file *file)
-{
- kfree(file->private_data);
-
- return 0;
-}
-
-static const struct file_operations u32_array_fops = {
- .owner = THIS_MODULE,
- .open = u32_array_open,
- .release= xen_array_release,
- .read = u32_array_read,
- .llseek = no_llseek,
-};
-
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
- struct dentry *parent,
- u32 *array, unsigned elements)
-{
- struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
-
- if (data == NULL)
- return NULL;
-
- data->array = array;
- data->elements = elements;
-
- return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/debugfs.h b/ANDROID_3.4.5/arch/x86/xen/debugfs.h
deleted file mode 100644
index 78d25499..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/debugfs.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _XEN_DEBUGFS_H
-#define _XEN_DEBUGFS_H
-
-struct dentry * __init xen_init_debugfs(void);
-
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
- struct dentry *parent,
- u32 *array, unsigned elements);
-
-#endif /* _XEN_DEBUGFS_H */
diff --git a/ANDROID_3.4.5/arch/x86/xen/enlighten.c b/ANDROID_3.4.5/arch/x86/xen/enlighten.c
deleted file mode 100644
index 40edfc37..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/enlighten.c
+++ /dev/null
@@ -1,1557 +0,0 @@
-/*
- * Core of Xen paravirt_ops implementation.
- *
- * This file contains the xen_paravirt_ops structure itself, and the
- * implementations for:
- * - privileged instructions
- * - interrupt flags
- * - segment operations
- * - booting and setup
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/cpu.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/preempt.h>
-#include <linux/hardirq.h>
-#include <linux/percpu.h>
-#include <linux/delay.h>
-#include <linux/start_kernel.h>
-#include <linux/sched.h>
-#include <linux/kprobes.h>
-#include <linux/bootmem.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/page-flags.h>
-#include <linux/highmem.h>
-#include <linux/console.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/memblock.h>
-
-#include <xen/xen.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/version.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/vcpu.h>
-#include <xen/interface/memory.h>
-#include <xen/features.h>
-#include <xen/page.h>
-#include <xen/hvm.h>
-#include <xen/hvc-console.h>
-
-#include <asm/paravirt.h>
-#include <asm/apic.h>
-#include <asm/page.h>
-#include <asm/xen/pci.h>
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/proto.h>
-#include <asm/msr-index.h>
-#include <asm/traps.h>
-#include <asm/setup.h>
-#include <asm/desc.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/reboot.h>
-#include <asm/stackprotector.h>
-#include <asm/hypervisor.h>
-#include <asm/mwait.h>
-#include <asm/pci_x86.h>
-
-#ifdef CONFIG_ACPI
-#include <linux/acpi.h>
-#include <asm/acpi.h>
-#include <acpi/pdc_intel.h>
-#include <acpi/processor.h>
-#include <xen/interface/platform.h>
-#endif
-
-#include "xen-ops.h"
-#include "mmu.h"
-#include "multicalls.h"
-
-EXPORT_SYMBOL_GPL(hypercall_page);
-
-DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
-DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-
-enum xen_domain_type xen_domain_type = XEN_NATIVE;
-EXPORT_SYMBOL_GPL(xen_domain_type);
-
-unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
-EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned long machine_to_phys_nr;
-EXPORT_SYMBOL(machine_to_phys_nr);
-
-struct start_info *xen_start_info;
-EXPORT_SYMBOL_GPL(xen_start_info);
-
-struct shared_info xen_dummy_shared_info;
-
-void *xen_initial_gdt;
-
-RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
-__read_mostly int xen_have_vector_callback;
-EXPORT_SYMBOL_GPL(xen_have_vector_callback);
-
-/*
- * Point at some empty memory to start with. We map the real shared_info
- * page as soon as fixmap is up and running.
- */
-struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
-
-/*
- * Flag to determine whether vcpu info placement is available on all
- * VCPUs. We assume it is to start with, and then set it to zero on
- * the first failure. This is because it can succeed on some VCPUs
- * and not others, since it can involve hypervisor memory allocation,
- * or because the guest failed to guarantee all the appropriate
- * constraints on all VCPUs (ie buffer can't cross a page boundary).
- *
- * Note that any particular CPU may be using a placed vcpu structure,
- * but we can only optimise if the all are.
- *
- * 0: not available, 1: available
- */
-static int have_vcpu_info_placement = 1;
-
-static void clamp_max_cpus(void)
-{
-#ifdef CONFIG_SMP
- if (setup_max_cpus > MAX_VIRT_CPUS)
- setup_max_cpus = MAX_VIRT_CPUS;
-#endif
-}
-
-static void xen_vcpu_setup(int cpu)
-{
- struct vcpu_register_vcpu_info info;
- int err;
- struct vcpu_info *vcpup;
-
- BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
-
- if (cpu < MAX_VIRT_CPUS)
- per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-
- if (!have_vcpu_info_placement) {
- if (cpu >= MAX_VIRT_CPUS)
- clamp_max_cpus();
- return;
- }
-
- vcpup = &per_cpu(xen_vcpu_info, cpu);
- info.mfn = arbitrary_virt_to_mfn(vcpup);
- info.offset = offset_in_page(vcpup);
-
- /* Check to see if the hypervisor will put the vcpu_info
- structure where we want it, which allows direct access via
- a percpu-variable. */
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
-
- if (err) {
- printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
- have_vcpu_info_placement = 0;
- clamp_max_cpus();
- } else {
- /* This cpu is using the registered vcpu info, even if
- later ones fail to. */
- per_cpu(xen_vcpu, cpu) = vcpup;
- }
-}
-
-/*
- * On restore, set the vcpu placement up again.
- * If it fails, then we're in a bad state, since
- * we can't back out from using it...
- */
-void xen_vcpu_restore(void)
-{
- int cpu;
-
- for_each_online_cpu(cpu) {
- bool other_cpu = (cpu != smp_processor_id());
-
- if (other_cpu &&
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
- BUG();
-
- xen_setup_runstate_info(cpu);
-
- if (have_vcpu_info_placement)
- xen_vcpu_setup(cpu);
-
- if (other_cpu &&
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
- BUG();
- }
-}
-
-static void __init xen_banner(void)
-{
- unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
- struct xen_extraversion extra;
- HYPERVISOR_xen_version(XENVER_extraversion, &extra);
-
- printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- pv_info.name);
- printk(KERN_INFO "Xen version: %d.%d%s%s\n",
- version >> 16, version & 0xffff, extra.extraversion,
- xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
-}
-
-#define CPUID_THERM_POWER_LEAF 6
-#define APERFMPERF_PRESENT 0
-
-static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
-static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
-
-static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
-static __read_mostly unsigned int cpuid_leaf5_ecx_val;
-static __read_mostly unsigned int cpuid_leaf5_edx_val;
-
-static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- unsigned int *cx, unsigned int *dx)
-{
- unsigned maskebx = ~0;
- unsigned maskecx = ~0;
- unsigned maskedx = ~0;
- unsigned setecx = 0;
- /*
- * Mask out inconvenient features, to try and disable as many
- * unsupported kernel subsystems as possible.
- */
- switch (*ax) {
- case 1:
- maskecx = cpuid_leaf1_ecx_mask;
- setecx = cpuid_leaf1_ecx_set_mask;
- maskedx = cpuid_leaf1_edx_mask;
- break;
-
- case CPUID_MWAIT_LEAF:
- /* Synthesize the values.. */
- *ax = 0;
- *bx = 0;
- *cx = cpuid_leaf5_ecx_val;
- *dx = cpuid_leaf5_edx_val;
- return;
-
- case CPUID_THERM_POWER_LEAF:
- /* Disabling APERFMPERF for kernel usage */
- maskecx = ~(1 << APERFMPERF_PRESENT);
- break;
-
- case 0xb:
- /* Suppress extended topology stuff */
- maskebx = 0;
- break;
- }
-
- asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*ax),
- "=b" (*bx),
- "=c" (*cx),
- "=d" (*dx)
- : "0" (*ax), "2" (*cx));
-
- *bx &= maskebx;
- *cx &= maskecx;
- *cx |= setecx;
- *dx &= maskedx;
-
-}
-
-static bool __init xen_check_mwait(void)
-{
-#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \
- !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE)
- struct xen_platform_op op = {
- .cmd = XENPF_set_processor_pminfo,
- .u.set_pminfo.id = -1,
- .u.set_pminfo.type = XEN_PM_PDC,
- };
- uint32_t buf[3];
- unsigned int ax, bx, cx, dx;
- unsigned int mwait_mask;
-
- /* We need to determine whether it is OK to expose the MWAIT
- * capability to the kernel to harvest deeper than C3 states from ACPI
- * _CST using the processor_harvest_xen.c module. For this to work, we
- * need to gather the MWAIT_LEAF values (which the cstate.c code
- * checks against). The hypervisor won't expose the MWAIT flag because
- * it would break backwards compatibility; so we will find out directly
- * from the hardware and hypercall.
- */
- if (!xen_initial_domain())
- return false;
-
- ax = 1;
- cx = 0;
-
- native_cpuid(&ax, &bx, &cx, &dx);
-
- mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
- (1 << (X86_FEATURE_MWAIT % 32));
-
- if ((cx & mwait_mask) != mwait_mask)
- return false;
-
- /* We need to emulate the MWAIT_LEAF and for that we need both
- * ecx and edx. The hypercall provides only partial information.
- */
-
- ax = CPUID_MWAIT_LEAF;
- bx = 0;
- cx = 0;
- dx = 0;
-
- native_cpuid(&ax, &bx, &cx, &dx);
-
- /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
- * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
- */
- buf[0] = ACPI_PDC_REVISION_ID;
- buf[1] = 1;
- buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
-
- set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
-
- if ((HYPERVISOR_dom0_op(&op) == 0) &&
- (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
- cpuid_leaf5_ecx_val = cx;
- cpuid_leaf5_edx_val = dx;
- }
- return true;
-#else
- return false;
-#endif
-}
-static void __init xen_init_cpuid_mask(void)
-{
- unsigned int ax, bx, cx, dx;
- unsigned int xsave_mask;
-
- cpuid_leaf1_edx_mask =
- ~((1 << X86_FEATURE_MCE) | /* disable MCE */
- (1 << X86_FEATURE_MCA) | /* disable MCA */
- (1 << X86_FEATURE_MTRR) | /* disable MTRR */
- (1 << X86_FEATURE_ACC)); /* thermal monitoring */
-
- if (!xen_initial_domain())
- cpuid_leaf1_edx_mask &=
- ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
- (1 << X86_FEATURE_ACPI)); /* disable ACPI */
- ax = 1;
- cx = 0;
- xen_cpuid(&ax, &bx, &cx, &dx);
-
- xsave_mask =
- (1 << (X86_FEATURE_XSAVE % 32)) |
- (1 << (X86_FEATURE_OSXSAVE % 32));
-
- /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
- if ((cx & xsave_mask) != xsave_mask)
- cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
- if (xen_check_mwait())
- cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
-}
-
-static void xen_set_debugreg(int reg, unsigned long val)
-{
- HYPERVISOR_set_debugreg(reg, val);
-}
-
-static unsigned long xen_get_debugreg(int reg)
-{
- return HYPERVISOR_get_debugreg(reg);
-}
-
-static void xen_end_context_switch(struct task_struct *next)
-{
- xen_mc_flush();
- paravirt_end_context_switch(next);
-}
-
-static unsigned long xen_store_tr(void)
-{
- return 0;
-}
-
-/*
- * Set the page permissions for a particular virtual address. If the
- * address is a vmalloc mapping (or other non-linear mapping), then
- * find the linear mapping of the page and also set its protections to
- * match.
- */
-static void set_aliased_prot(void *v, pgprot_t prot)
-{
- int level;
- pte_t *ptep;
- pte_t pte;
- unsigned long pfn;
- struct page *page;
-
- ptep = lookup_address((unsigned long)v, &level);
- BUG_ON(ptep == NULL);
-
- pfn = pte_pfn(*ptep);
- page = pfn_to_page(pfn);
-
- pte = pfn_pte(pfn, prot);
-
- if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
- BUG();
-
- if (!PageHighMem(page)) {
- void *av = __va(PFN_PHYS(pfn));
-
- if (av != v)
- if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
- BUG();
- } else
- kmap_flush_unused();
-}
-
-static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
-{
- const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
- int i;
-
- for(i = 0; i < entries; i += entries_per_page)
- set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
-}
-
-static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
-{
- const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
- int i;
-
- for(i = 0; i < entries; i += entries_per_page)
- set_aliased_prot(ldt + i, PAGE_KERNEL);
-}
-
-static void xen_set_ldt(const void *addr, unsigned entries)
-{
- struct mmuext_op *op;
- struct multicall_space mcs = xen_mc_entry(sizeof(*op));
-
- trace_xen_cpu_set_ldt(addr, entries);
-
- op = mcs.args;
- op->cmd = MMUEXT_SET_LDT;
- op->arg1.linear_addr = (unsigned long)addr;
- op->arg2.nr_ents = entries;
-
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-static void xen_load_gdt(const struct desc_ptr *dtr)
-{
- unsigned long va = dtr->address;
- unsigned int size = dtr->size + 1;
- unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
- unsigned long frames[pages];
- int f;
-
- /*
- * A GDT can be up to 64k in size, which corresponds to 8192
- * 8-byte entries, or 16 4k pages..
- */
-
- BUG_ON(size > 65536);
- BUG_ON(va & ~PAGE_MASK);
-
- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- int level;
- pte_t *ptep;
- unsigned long pfn, mfn;
- void *virt;
-
- /*
- * The GDT is per-cpu and is in the percpu data area.
- * That can be virtually mapped, so we need to do a
- * page-walk to get the underlying MFN for the
- * hypercall. The page can also be in the kernel's
- * linear range, so we need to RO that mapping too.
- */
- ptep = lookup_address(va, &level);
- BUG_ON(ptep == NULL);
-
- pfn = pte_pfn(*ptep);
- mfn = pfn_to_mfn(pfn);
- virt = __va(PFN_PHYS(pfn));
-
- frames[f] = mfn;
-
- make_lowmem_page_readonly((void *)va);
- make_lowmem_page_readonly(virt);
- }
-
- if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
- BUG();
-}
-
-/*
- * load_gdt for early boot, when the gdt is only mapped once
- */
-static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
-{
- unsigned long va = dtr->address;
- unsigned int size = dtr->size + 1;
- unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
- unsigned long frames[pages];
- int f;
-
- /*
- * A GDT can be up to 64k in size, which corresponds to 8192
- * 8-byte entries, or 16 4k pages..
- */
-
- BUG_ON(size > 65536);
- BUG_ON(va & ~PAGE_MASK);
-
- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- pte_t pte;
- unsigned long pfn, mfn;
-
- pfn = virt_to_pfn(va);
- mfn = pfn_to_mfn(pfn);
-
- pte = pfn_pte(pfn, PAGE_KERNEL_RO);
-
- if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
- BUG();
-
- frames[f] = mfn;
- }
-
- if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
- BUG();
-}
-
-static void load_TLS_descriptor(struct thread_struct *t,
- unsigned int cpu, unsigned int i)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
- struct multicall_space mc = __xen_mc_entry(0);
-
- MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
-}
-
-static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- /*
- * XXX sleazy hack: If we're being called in a lazy-cpu zone
- * and lazy gs handling is enabled, it means we're in a
- * context switch, and %gs has just been saved. This means we
- * can zero it out to prevent faults on exit from the
- * hypervisor if the next process has no %gs. Either way, it
- * has been saved, and the new value will get loaded properly.
- * This will go away as soon as Xen has been modified to not
- * save/restore %gs for normal hypercalls.
- *
- * On x86_64, this hack is not used for %gs, because gs points
- * to KERNEL_GS_BASE (and uses it for PDA references), so we
- * must not zero %gs on x86_64
- *
- * For x86_64, we need to zero %fs, otherwise we may get an
- * exception between the new %fs descriptor being loaded and
- * %fs being effectively cleared at __switch_to().
- */
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-#ifdef CONFIG_X86_32
- lazy_load_gs(0);
-#else
- loadsegment(fs, 0);
-#endif
- }
-
- xen_mc_batch();
-
- load_TLS_descriptor(t, cpu, 0);
- load_TLS_descriptor(t, cpu, 1);
- load_TLS_descriptor(t, cpu, 2);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-#ifdef CONFIG_X86_64
-static void xen_load_gs_index(unsigned int idx)
-{
- if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
- BUG();
-}
-#endif
-
-static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
- const void *ptr)
-{
- xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
- u64 entry = *(u64 *)ptr;
-
- trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
-
- preempt_disable();
-
- xen_mc_flush();
- if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
- BUG();
-
- preempt_enable();
-}
-
-static int cvt_gate_to_trap(int vector, const gate_desc *val,
- struct trap_info *info)
-{
- unsigned long addr;
-
- if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
- return 0;
-
- info->vector = vector;
-
- addr = gate_offset(*val);
-#ifdef CONFIG_X86_64
- /*
- * Look for known traps using IST, and substitute them
- * appropriately. The debugger ones are the only ones we care
- * about. Xen will handle faults like double_fault and
- * machine_check, so we should never see them. Warn if
- * there's an unexpected IST-using fault handler.
- */
- if (addr == (unsigned long)debug)
- addr = (unsigned long)xen_debug;
- else if (addr == (unsigned long)int3)
- addr = (unsigned long)xen_int3;
- else if (addr == (unsigned long)stack_segment)
- addr = (unsigned long)xen_stack_segment;
- else if (addr == (unsigned long)double_fault ||
- addr == (unsigned long)nmi) {
- /* Don't need to handle these */
- return 0;
-#ifdef CONFIG_X86_MCE
- } else if (addr == (unsigned long)machine_check) {
- return 0;
-#endif
- } else {
- /* Some other trap using IST? */
- if (WARN_ON(val->ist != 0))
- return 0;
- }
-#endif /* CONFIG_X86_64 */
- info->address = addr;
-
- info->cs = gate_segment(*val);
- info->flags = val->dpl;
- /* interrupt gates clear IF */
- if (val->type == GATE_INTERRUPT)
- info->flags |= 1 << 2;
-
- return 1;
-}
-
-/* Locations of each CPU's IDT */
-static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
-
-/* Set an IDT entry. If the entry is part of the current IDT, then
- also update Xen. */
-static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
-{
- unsigned long p = (unsigned long)&dt[entrynum];
- unsigned long start, end;
-
- trace_xen_cpu_write_idt_entry(dt, entrynum, g);
-
- preempt_disable();
-
- start = __this_cpu_read(idt_desc.address);
- end = start + __this_cpu_read(idt_desc.size) + 1;
-
- xen_mc_flush();
-
- native_write_idt_entry(dt, entrynum, g);
-
- if (p >= start && (p + 8) <= end) {
- struct trap_info info[2];
-
- info[1].address = 0;
-
- if (cvt_gate_to_trap(entrynum, g, &info[0]))
- if (HYPERVISOR_set_trap_table(info))
- BUG();
- }
-
- preempt_enable();
-}
-
-static void xen_convert_trap_info(const struct desc_ptr *desc,
- struct trap_info *traps)
-{
- unsigned in, out, count;
-
- count = (desc->size+1) / sizeof(gate_desc);
- BUG_ON(count > 256);
-
- for (in = out = 0; in < count; in++) {
- gate_desc *entry = (gate_desc*)(desc->address) + in;
-
- if (cvt_gate_to_trap(in, entry, &traps[out]))
- out++;
- }
- traps[out].address = 0;
-}
-
-void xen_copy_trap_info(struct trap_info *traps)
-{
- const struct desc_ptr *desc = &__get_cpu_var(idt_desc);
-
- xen_convert_trap_info(desc, traps);
-}
-
-/* Load a new IDT into Xen. In principle this can be per-CPU, so we
- hold a spinlock to protect the static traps[] array (static because
- it avoids allocation, and saves stack space). */
-static void xen_load_idt(const struct desc_ptr *desc)
-{
- static DEFINE_SPINLOCK(lock);
- static struct trap_info traps[257];
-
- trace_xen_cpu_load_idt(desc);
-
- spin_lock(&lock);
-
- __get_cpu_var(idt_desc) = *desc;
-
- xen_convert_trap_info(desc, traps);
-
- xen_mc_flush();
- if (HYPERVISOR_set_trap_table(traps))
- BUG();
-
- spin_unlock(&lock);
-}
-
-/* Write a GDT descriptor entry. Ignore LDT descriptors, since
- they're handled differently. */
-static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
- const void *desc, int type)
-{
- trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
-
- preempt_disable();
-
- switch (type) {
- case DESC_LDT:
- case DESC_TSS:
- /* ignore */
- break;
-
- default: {
- xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
-
- xen_mc_flush();
- if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
- BUG();
- }
-
- }
-
- preempt_enable();
-}
-
-/*
- * Version of write_gdt_entry for use at early boot-time needed to
- * update an entry as simply as possible.
- */
-static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
- const void *desc, int type)
-{
- trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
-
- switch (type) {
- case DESC_LDT:
- case DESC_TSS:
- /* ignore */
- break;
-
- default: {
- xmaddr_t maddr = virt_to_machine(&dt[entry]);
-
- if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
- dt[entry] = *(struct desc_struct *)desc;
- }
-
- }
-}
-
-static void xen_load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
-{
- struct multicall_space mcs;
-
- mcs = xen_mc_entry(0);
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-static void xen_set_iopl_mask(unsigned mask)
-{
- struct physdev_set_iopl set_iopl;
-
- /* Force the change at ring 0. */
- set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-}
-
-static void xen_io_delay(void)
-{
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_set_apic_id(unsigned int x)
-{
- WARN_ON(1);
- return x;
-}
-static unsigned int xen_get_apic_id(unsigned long x)
-{
- return ((x)>>24) & 0xFFu;
-}
-static u32 xen_apic_read(u32 reg)
-{
- struct xen_platform_op op = {
- .cmd = XENPF_get_cpuinfo,
- .interface_version = XENPF_INTERFACE_VERSION,
- .u.pcpu_info.xen_cpuid = 0,
- };
- int ret = 0;
-
- /* Shouldn't need this as APIC is turned off for PV, and we only
- * get called on the bootup processor. But just in case. */
- if (!xen_initial_domain() || smp_processor_id())
- return 0;
-
- if (reg == APIC_LVR)
- return 0x10;
-
- if (reg != APIC_ID)
- return 0;
-
- ret = HYPERVISOR_dom0_op(&op);
- if (ret)
- return 0;
-
- return op.u.pcpu_info.apic_id << 24;
-}
-
-static void xen_apic_write(u32 reg, u32 val)
-{
- /* Warn to see if there's any stray references */
- WARN_ON(1);
-}
-
-static u64 xen_apic_icr_read(void)
-{
- return 0;
-}
-
-static void xen_apic_icr_write(u32 low, u32 id)
-{
- /* Warn to see if there's any stray references */
- WARN_ON(1);
-}
-
-static void xen_apic_wait_icr_idle(void)
-{
- return;
-}
-
-static u32 xen_safe_apic_wait_icr_idle(void)
-{
- return 0;
-}
-
-static void set_xen_basic_apic_ops(void)
-{
- apic->read = xen_apic_read;
- apic->write = xen_apic_write;
- apic->icr_read = xen_apic_icr_read;
- apic->icr_write = xen_apic_icr_write;
- apic->wait_icr_idle = xen_apic_wait_icr_idle;
- apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
- apic->set_apic_id = xen_set_apic_id;
- apic->get_apic_id = xen_get_apic_id;
-}
-
-#endif
-
-static void xen_clts(void)
-{
- struct multicall_space mcs;
-
- mcs = xen_mc_entry(0);
-
- MULTI_fpu_taskswitch(mcs.mc, 0);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
-
-static unsigned long xen_read_cr0(void)
-{
- unsigned long cr0 = this_cpu_read(xen_cr0_value);
-
- if (unlikely(cr0 == 0)) {
- cr0 = native_read_cr0();
- this_cpu_write(xen_cr0_value, cr0);
- }
-
- return cr0;
-}
-
-static void xen_write_cr0(unsigned long cr0)
-{
- struct multicall_space mcs;
-
- this_cpu_write(xen_cr0_value, cr0);
-
- /* Only pay attention to cr0.TS; everything else is
- ignored. */
- mcs = xen_mc_entry(0);
-
- MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-static void xen_write_cr4(unsigned long cr4)
-{
- cr4 &= ~X86_CR4_PGE;
- cr4 &= ~X86_CR4_PSE;
-
- native_write_cr4(cr4);
-}
-
-static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
-{
- int ret;
-
- ret = 0;
-
- switch (msr) {
-#ifdef CONFIG_X86_64
- unsigned which;
- u64 base;
-
- case MSR_FS_BASE: which = SEGBASE_FS; goto set;
- case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
- case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
-
- set:
- base = ((u64)high << 32) | low;
- if (HYPERVISOR_set_segment_base(which, base) != 0)
- ret = -EIO;
- break;
-#endif
-
- case MSR_STAR:
- case MSR_CSTAR:
- case MSR_LSTAR:
- case MSR_SYSCALL_MASK:
- case MSR_IA32_SYSENTER_CS:
- case MSR_IA32_SYSENTER_ESP:
- case MSR_IA32_SYSENTER_EIP:
- /* Fast syscall setup is all done in hypercalls, so
- these are all ignored. Stub them out here to stop
- Xen console noise. */
- break;
-
- case MSR_IA32_CR_PAT:
- if (smp_processor_id() == 0)
- xen_set_pat(((u64)high << 32) | low);
- break;
-
- default:
- ret = native_write_msr_safe(msr, low, high);
- }
-
- return ret;
-}
-
-void xen_setup_shared_info(void)
-{
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- set_fixmap(FIX_PARAVIRT_BOOTMAP,
- xen_start_info->shared_info);
-
- HYPERVISOR_shared_info =
- (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
- } else
- HYPERVISOR_shared_info =
- (struct shared_info *)__va(xen_start_info->shared_info);
-
-#ifndef CONFIG_SMP
- /* In UP this is as good a place as any to set up shared info */
- xen_setup_vcpu_info_placement();
-#endif
-
- xen_setup_mfn_list_list();
-}
-
-/* This is called once we have the cpu_possible_mask */
-void xen_setup_vcpu_info_placement(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- xen_vcpu_setup(cpu);
-
- /* xen_vcpu_setup managed to place the vcpu_info within the
- percpu area for all cpus, so make use of it */
- if (have_vcpu_info_placement) {
- pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
- pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
- pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
- pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
- }
-}
-
-static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
- unsigned long addr, unsigned len)
-{
- char *start, *end, *reloc;
- unsigned ret;
-
- start = end = reloc = NULL;
-
-#define SITE(op, x) \
- case PARAVIRT_PATCH(op.x): \
- if (have_vcpu_info_placement) { \
- start = (char *)xen_##x##_direct; \
- end = xen_##x##_direct_end; \
- reloc = xen_##x##_direct_reloc; \
- } \
- goto patch_site
-
- switch (type) {
- SITE(pv_irq_ops, irq_enable);
- SITE(pv_irq_ops, irq_disable);
- SITE(pv_irq_ops, save_fl);
- SITE(pv_irq_ops, restore_fl);
-#undef SITE
-
- patch_site:
- if (start == NULL || (end-start) > len)
- goto default_patch;
-
- ret = paravirt_patch_insns(insnbuf, len, start, end);
-
- /* Note: because reloc is assigned from something that
- appears to be an array, gcc assumes it's non-null,
- but doesn't know its relationship with start and
- end. */
- if (reloc > start && reloc < end) {
- int reloc_off = reloc - start;
- long *relocp = (long *)(insnbuf + reloc_off);
- long delta = start - (char *)addr;
-
- *relocp += delta;
- }
- break;
-
- default_patch:
- default:
- ret = paravirt_patch_default(type, clobbers, insnbuf,
- addr, len);
- break;
- }
-
- return ret;
-}
-
-static const struct pv_info xen_info __initconst = {
- .paravirt_enabled = 1,
- .shared_kernel_pmd = 0,
-
-#ifdef CONFIG_X86_64
- .extra_user_64bit_cs = FLAT_USER_CS64,
-#endif
-
- .name = "Xen",
-};
-
-static const struct pv_init_ops xen_init_ops __initconst = {
- .patch = xen_patch,
-};
-
-static const struct pv_cpu_ops xen_cpu_ops __initconst = {
- .cpuid = xen_cpuid,
-
- .set_debugreg = xen_set_debugreg,
- .get_debugreg = xen_get_debugreg,
-
- .clts = xen_clts,
-
- .read_cr0 = xen_read_cr0,
- .write_cr0 = xen_write_cr0,
-
- .read_cr4 = native_read_cr4,
- .read_cr4_safe = native_read_cr4_safe,
- .write_cr4 = xen_write_cr4,
-
- .wbinvd = native_wbinvd,
-
- .read_msr = native_read_msr_safe,
- .rdmsr_regs = native_rdmsr_safe_regs,
- .write_msr = xen_write_msr_safe,
- .wrmsr_regs = native_wrmsr_safe_regs,
-
- .read_tsc = native_read_tsc,
- .read_pmc = native_read_pmc,
-
- .iret = xen_iret,
- .irq_enable_sysexit = xen_sysexit,
-#ifdef CONFIG_X86_64
- .usergs_sysret32 = xen_sysret32,
- .usergs_sysret64 = xen_sysret64,
-#endif
-
- .load_tr_desc = paravirt_nop,
- .set_ldt = xen_set_ldt,
- .load_gdt = xen_load_gdt,
- .load_idt = xen_load_idt,
- .load_tls = xen_load_tls,
-#ifdef CONFIG_X86_64
- .load_gs_index = xen_load_gs_index,
-#endif
-
- .alloc_ldt = xen_alloc_ldt,
- .free_ldt = xen_free_ldt,
-
- .store_gdt = native_store_gdt,
- .store_idt = native_store_idt,
- .store_tr = xen_store_tr,
-
- .write_ldt_entry = xen_write_ldt_entry,
- .write_gdt_entry = xen_write_gdt_entry,
- .write_idt_entry = xen_write_idt_entry,
- .load_sp0 = xen_load_sp0,
-
- .set_iopl_mask = xen_set_iopl_mask,
- .io_delay = xen_io_delay,
-
- /* Xen takes care of %gs when switching to usermode for us */
- .swapgs = paravirt_nop,
-
- .start_context_switch = paravirt_start_context_switch,
- .end_context_switch = xen_end_context_switch,
-};
-
-static const struct pv_apic_ops xen_apic_ops __initconst = {
-#ifdef CONFIG_X86_LOCAL_APIC
- .startup_ipi_hook = paravirt_nop,
-#endif
-};
-
-static void xen_reboot(int reason)
-{
- struct sched_shutdown r = { .reason = reason };
-
- if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
- BUG();
-}
-
-static void xen_restart(char *msg)
-{
- xen_reboot(SHUTDOWN_reboot);
-}
-
-static void xen_emergency_restart(void)
-{
- xen_reboot(SHUTDOWN_reboot);
-}
-
-static void xen_machine_halt(void)
-{
- xen_reboot(SHUTDOWN_poweroff);
-}
-
-static void xen_machine_power_off(void)
-{
- if (pm_power_off)
- pm_power_off();
- xen_reboot(SHUTDOWN_poweroff);
-}
-
-static void xen_crash_shutdown(struct pt_regs *regs)
-{
- xen_reboot(SHUTDOWN_crash);
-}
-
-static int
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- xen_reboot(SHUTDOWN_crash);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block xen_panic_block = {
- .notifier_call= xen_panic_event,
-};
-
-int xen_panic_handler_init(void)
-{
- atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
- return 0;
-}
-
-static const struct machine_ops xen_machine_ops __initconst = {
- .restart = xen_restart,
- .halt = xen_machine_halt,
- .power_off = xen_machine_power_off,
- .shutdown = xen_machine_halt,
- .crash_shutdown = xen_crash_shutdown,
- .emergency_restart = xen_emergency_restart,
-};
-
-/*
- * Set up the GDT and segment registers for -fstack-protector. Until
- * we do this, we have to be careful not to call any stack-protected
- * function, which is most of the kernel.
- */
-static void __init xen_setup_stackprotector(void)
-{
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
- pv_cpu_ops.load_gdt = xen_load_gdt_boot;
-
- setup_stack_canary_segment(0);
- switch_to_new_gdt(0);
-
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
- pv_cpu_ops.load_gdt = xen_load_gdt;
-}
-
-/* First C function to be called on Xen boot */
-asmlinkage void __init xen_start_kernel(void)
-{
- struct physdev_set_iopl set_iopl;
- int rc;
- pgd_t *pgd;
-
- if (!xen_start_info)
- return;
-
- xen_domain_type = XEN_PV_DOMAIN;
-
- xen_setup_machphys_mapping();
-
- /* Install Xen paravirt ops */
- pv_info = xen_info;
- pv_init_ops = xen_init_ops;
- pv_cpu_ops = xen_cpu_ops;
- pv_apic_ops = xen_apic_ops;
-
- x86_init.resources.memory_setup = xen_memory_setup;
- x86_init.oem.arch_setup = xen_arch_setup;
- x86_init.oem.banner = xen_banner;
-
- xen_init_time_ops();
-
- /*
- * Set up some pagetable state before starting to set any ptes.
- */
-
- xen_init_mmu_ops();
-
- /* Prevent unwanted bits from being set in PTEs. */
- __supported_pte_mask &= ~_PAGE_GLOBAL;
-#if 0
- if (!xen_initial_domain())
-#endif
- __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
-
- __supported_pte_mask |= _PAGE_IOMAP;
-
- /*
- * Prevent page tables from being allocated in highmem, even
- * if CONFIG_HIGHPTE is enabled.
- */
- __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
-
- /* Work out if we support NX */
- x86_configure_nx();
-
- xen_setup_features();
-
- /* Get mfn list */
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- xen_build_dynamic_phys_to_machine();
-
- /*
- * Set up kernel GDT and segment registers, mainly so that
- * -fstack-protector code can be executed.
- */
- xen_setup_stackprotector();
-
- xen_init_irq_ops();
- xen_init_cpuid_mask();
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * set up the basic apic ops.
- */
- set_xen_basic_apic_ops();
-#endif
-
- if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
- pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
- pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
- }
-
- machine_ops = xen_machine_ops;
-
- /*
- * The only reliable way to retain the initial address of the
- * percpu gdt_page is to remember it here, so we can go and
- * mark it RW later, when the initial percpu area is freed.
- */
- xen_initial_gdt = &per_cpu(gdt_page, 0);
-
- xen_smp_init();
-
-#ifdef CONFIG_ACPI_NUMA
- /*
- * The pages we from Xen are not related to machine pages, so
- * any NUMA information the kernel tries to get from ACPI will
- * be meaningless. Prevent it from trying.
- */
- acpi_numa = -1;
-#endif
-
- pgd = (pgd_t *)xen_start_info->pt_base;
-
- /* Don't do the full vcpu_info placement stuff until we have a
- possible map and a non-dummy shared_info. */
- per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-
- local_irq_disable();
- early_boot_irqs_disabled = true;
-
- xen_raw_console_write("mapping kernel into physical memory\n");
- pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
- xen_ident_map_ISA();
-
- /* Allocate and initialize top and mid mfn levels for p2m structure */
- xen_build_mfn_list_list();
-
- /* keep using Xen gdt for now; no urgent need to change it */
-
-#ifdef CONFIG_X86_32
- pv_info.kernel_rpl = 1;
- if (xen_feature(XENFEAT_supervisor_mode_kernel))
- pv_info.kernel_rpl = 0;
-#else
- pv_info.kernel_rpl = 0;
-#endif
- /* set the limit of our address space */
- xen_reserve_top();
-
- /* We used to do this in xen_arch_setup, but that is too late on AMD
- * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
- * which pokes 0xcf8 port.
- */
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- xen_raw_printk("physdev_op failed %d\n", rc);
-
-#ifdef CONFIG_X86_32
- /* set up basic CPUID stuff */
- cpu_detect(&new_cpu_data);
- new_cpu_data.hard_math = 1;
- new_cpu_data.wp_works_ok = 1;
- new_cpu_data.x86_capability[0] = cpuid_edx(1);
-#endif
-
- /* Poke various useful things into boot_params */
- boot_params.hdr.type_of_loader = (9 << 4) | 0;
- boot_params.hdr.ramdisk_image = xen_start_info->mod_start
- ? __pa(xen_start_info->mod_start) : 0;
- boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
- boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
-
- if (!xen_initial_domain()) {
- add_preferred_console("xenboot", 0, NULL);
- add_preferred_console("tty", 0, NULL);
- add_preferred_console("hvc", 0, NULL);
- if (pci_xen)
- x86_init.pci.arch_init = pci_xen_init;
- } else {
- const struct dom0_vga_console_info *info =
- (void *)((char *)xen_start_info +
- xen_start_info->console.dom0.info_off);
-
- xen_init_vga(info, xen_start_info->console.dom0.info_size);
- xen_start_info->console.domU.mfn = 0;
- xen_start_info->console.domU.evtchn = 0;
-
- /* Make sure ACS will be enabled */
- pci_request_acs();
- }
-#ifdef CONFIG_PCI
- /* PCI BIOS service won't work from a PV guest. */
- pci_probe &= ~PCI_PROBE_BIOS;
-#endif
- xen_raw_console_write("about to get started...\n");
-
- xen_setup_runstate_info(0);
-
- /* Start the world */
-#ifdef CONFIG_X86_32
- i386_start_kernel();
-#else
- x86_64_start_reservations((char *)__pa_symbol(&boot_params));
-#endif
-}
-
-static int init_hvm_pv_info(int *major, int *minor)
-{
- uint32_t eax, ebx, ecx, edx, pages, msr, base;
- u64 pfn;
-
- base = xen_cpuid_base();
- cpuid(base + 1, &eax, &ebx, &ecx, &edx);
-
- *major = eax >> 16;
- *minor = eax & 0xffff;
- printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
-
- cpuid(base + 2, &pages, &msr, &ecx, &edx);
-
- pfn = __pa(hypercall_page);
- wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-
- xen_setup_features();
-
- pv_info.name = "Xen HVM";
-
- xen_domain_type = XEN_HVM_DOMAIN;
-
- return 0;
-}
-
-void __ref xen_hvm_init_shared_info(void)
-{
- int cpu;
- struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page = 0;
-
- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- extend_brk(PAGE_SIZE, PAGE_SIZE);
- xatp.domid = DOMID_SELF;
- xatp.idx = 0;
- xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
- if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
- BUG();
-
- HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
-
- /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
- * page, we use it in the event channel upcall and in some pvclock
- * related functions. We don't need the vcpu_info placement
- * optimizations because we don't use any pv_mmu or pv_irq op on
- * HVM.
- * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
- * online but xen_hvm_init_shared_info is run at resume time too and
- * in that case multiple vcpus might be online. */
- for_each_online_cpu(cpu) {
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
- }
-}
-
-#ifdef CONFIG_XEN_PVHVM
-static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
- switch (action) {
- case CPU_UP_PREPARE:
- xen_vcpu_setup(cpu);
- if (xen_have_vector_callback)
- xen_init_lock_cpu(cpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
- .notifier_call = xen_hvm_cpu_notify,
-};
-
-static void __init xen_hvm_guest_init(void)
-{
- int r;
- int major, minor;
-
- r = init_hvm_pv_info(&major, &minor);
- if (r < 0)
- return;
-
- xen_hvm_init_shared_info();
-
- if (xen_feature(XENFEAT_hvm_callback_vector))
- xen_have_vector_callback = 1;
- xen_hvm_smp_init();
- register_cpu_notifier(&xen_hvm_cpu_notifier);
- xen_unplug_emulated_devices();
- x86_init.irqs.intr_init = xen_init_IRQ;
- xen_hvm_init_time_ops();
- xen_hvm_init_mmu_ops();
-}
-
-static bool __init xen_hvm_platform(void)
-{
- if (xen_pv_domain())
- return false;
-
- if (!xen_cpuid_base())
- return false;
-
- return true;
-}
-
-bool xen_hvm_need_lapic(void)
-{
- if (xen_pv_domain())
- return false;
- if (!xen_hvm_domain())
- return false;
- if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
- return false;
- return true;
-}
-EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
-
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
- .name = "Xen HVM",
- .detect = xen_hvm_platform,
- .init_platform = xen_hvm_guest_init,
-};
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
-#endif
diff --git a/ANDROID_3.4.5/arch/x86/xen/grant-table.c b/ANDROID_3.4.5/arch/x86/xen/grant-table.c
deleted file mode 100644
index 3a5f55d5..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/grant-table.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/******************************************************************************
- * grant_table.c
- * x86 specific part
- *
- * Granting foreign access to our memory reservation.
- *
- * Copyright (c) 2005-2006, Christopher Clark
- * Copyright (c) 2004-2005, K A Fraser
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- * VA Linux Systems Japan. Split out x86 specific part.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-#include <xen/interface/xen.h>
-#include <xen/page.h>
-#include <xen/grant_table.h>
-
-#include <asm/pgtable.h>
-
-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
- unsigned long addr, void *data)
-{
- unsigned long **frames = (unsigned long **)data;
-
- set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
- (*frames)++;
- return 0;
-}
-
-/*
- * This function is used to map shared frames to store grant status. It is
- * different from map_pte_fn above, the frames type here is uint64_t.
- */
-static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
- unsigned long addr, void *data)
-{
- uint64_t **frames = (uint64_t **)data;
-
- set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
- (*frames)++;
- return 0;
-}
-
-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
- unsigned long addr, void *data)
-{
-
- set_pte_at(&init_mm, addr, pte, __pte(0));
- return 0;
-}
-
-int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
- unsigned long max_nr_gframes,
- void **__shared)
-{
- int rc;
- void *shared = *__shared;
-
- if (shared == NULL) {
- struct vm_struct *area =
- alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
- BUG_ON(area == NULL);
- shared = area->addr;
- *__shared = shared;
- }
-
- rc = apply_to_page_range(&init_mm, (unsigned long)shared,
- PAGE_SIZE * nr_gframes,
- map_pte_fn, &frames);
- return rc;
-}
-
-int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
- unsigned long max_nr_gframes,
- grant_status_t **__shared)
-{
- int rc;
- grant_status_t *shared = *__shared;
-
- if (shared == NULL) {
- /* No need to pass in PTE as we are going to do it
- * in apply_to_page_range anyhow. */
- struct vm_struct *area =
- alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
- BUG_ON(area == NULL);
- shared = area->addr;
- *__shared = shared;
- }
-
- rc = apply_to_page_range(&init_mm, (unsigned long)shared,
- PAGE_SIZE * nr_gframes,
- map_pte_fn_status, &frames);
- return rc;
-}
-
-void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
-{
- apply_to_page_range(&init_mm, (unsigned long)shared,
- PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/irq.c b/ANDROID_3.4.5/arch/x86/xen/irq.c
deleted file mode 100644
index 15733765..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/irq.c
+++ /dev/null
@@ -1,133 +0,0 @@
-#include <linux/hardirq.h>
-
-#include <asm/x86_init.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/sched.h>
-#include <xen/interface/vcpu.h>
-
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-
-#include "xen-ops.h"
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void xen_force_evtchn_callback(void)
-{
- (void)HYPERVISOR_xen_version(0, NULL);
-}
-
-static unsigned long xen_save_fl(void)
-{
- struct vcpu_info *vcpu;
- unsigned long flags;
-
- vcpu = this_cpu_read(xen_vcpu);
-
- /* flag has opposite sense of mask */
- flags = !vcpu->evtchn_upcall_mask;
-
- /* convert to IF type flag
- -0 -> 0x00000000
- -1 -> 0xffffffff
- */
- return (-flags) & X86_EFLAGS_IF;
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
-
-static void xen_restore_fl(unsigned long flags)
-{
- struct vcpu_info *vcpu;
-
- /* convert from IF type flag */
- flags = !(flags & X86_EFLAGS_IF);
-
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- vcpu = this_cpu_read(xen_vcpu);
- vcpu->evtchn_upcall_mask = flags;
- preempt_enable_no_resched();
-
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
-
- if (flags == 0) {
- preempt_check_resched();
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- xen_force_evtchn_callback();
- }
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
-
-static void xen_irq_disable(void)
-{
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
- preempt_enable_no_resched();
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
-
-static void xen_irq_enable(void)
-{
- struct vcpu_info *vcpu;
-
- /* We don't need to worry about being preempted here, since
- either a) interrupts are disabled, so no preemption, or b)
- the caller is confused and is trying to re-enable interrupts
- on an indeterminate processor. */
-
- vcpu = this_cpu_read(xen_vcpu);
- vcpu->evtchn_upcall_mask = 0;
-
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
-
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- xen_force_evtchn_callback();
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
-
-static void xen_safe_halt(void)
-{
- /* Blocking includes an implicit local_irq_enable(). */
- if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
- BUG();
-}
-
-static void xen_halt(void)
-{
- if (irqs_disabled())
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
- else
- xen_safe_halt();
-}
-
-static const struct pv_irq_ops xen_irq_ops __initconst = {
- .save_fl = PV_CALLEE_SAVE(xen_save_fl),
- .restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
- .irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
- .irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
-
- .safe_halt = xen_safe_halt,
- .halt = xen_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = xen_adjust_exception_frame,
-#endif
-};
-
-void __init xen_init_irq_ops(void)
-{
- pv_irq_ops = xen_irq_ops;
- x86_init.irqs.intr_init = xen_init_IRQ;
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/mmu.c b/ANDROID_3.4.5/arch/x86/xen/mmu.c
deleted file mode 100644
index 69f58576..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/mmu.c
+++ /dev/null
@@ -1,2371 +0,0 @@
-/*
- * Xen mmu operations
- *
- * This file contains the various mmu fetch and update operations.
- * The most important job they must perform is the mapping between the
- * domain's pfn and the overall machine mfns.
- *
- * Xen allows guests to directly update the pagetable, in a controlled
- * fashion. In other words, the guest modifies the same pagetable
- * that the CPU actually uses, which eliminates the overhead of having
- * a separate shadow pagetable.
- *
- * In order to allow this, it falls on the guest domain to map its
- * notion of a "physical" pfn - which is just a domain-local linear
- * address - into a real "machine address" which the CPU's MMU can
- * use.
- *
- * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
- * inserted directly into the pagetable. When creating a new
- * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
- * when reading the content back with __(pgd|pmd|pte)_val, it converts
- * the mfn back into a pfn.
- *
- * The other constraint is that all pages which make up a pagetable
- * must be mapped read-only in the guest. This prevents uncontrolled
- * guest updates to the pagetable. Xen strictly enforces this, and
- * will disallow any pagetable update which will end up mapping a
- * pagetable page RW, and will disallow using any writable page as a
- * pagetable.
- *
- * Naively, when loading %cr3 with the base of a new pagetable, Xen
- * would need to validate the whole pagetable before going on.
- * Naturally, this is quite slow. The solution is to "pin" a
- * pagetable, which enforces all the constraints on the pagetable even
- * when it is not actively in use. This menas that Xen can be assured
- * that it is still valid when you do load it into %cr3, and doesn't
- * need to revalidate it.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-#include <linux/sched.h>
-#include <linux/highmem.h>
-#include <linux/debugfs.h>
-#include <linux/bug.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/gfp.h>
-#include <linux/memblock.h>
-#include <linux/seq_file.h>
-
-#include <trace/events/xen.h>
-
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/fixmap.h>
-#include <asm/mmu_context.h>
-#include <asm/setup.h>
-#include <asm/paravirt.h>
-#include <asm/e820.h>
-#include <asm/linkage.h>
-#include <asm/page.h>
-#include <asm/init.h>
-#include <asm/pat.h>
-#include <asm/smp.h>
-
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-
-#include <xen/xen.h>
-#include <xen/page.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/hvm/hvm_op.h>
-#include <xen/interface/version.h>
-#include <xen/interface/memory.h>
-#include <xen/hvc-console.h>
-
-#include "multicalls.h"
-#include "mmu.h"
-#include "debugfs.h"
-
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and balloon lists.
- */
-DEFINE_SPINLOCK(xen_reservation_lock);
-
-/*
- * Identity map, in addition to plain kernel map. This needs to be
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
-#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
-static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
-
-#ifdef CONFIG_X86_64
-/* l3 pud for userspace vsyscall mapping */
-static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
-#endif /* CONFIG_X86_64 */
-
-/*
- * Note about cr3 (pagetable base) values:
- *
- * xen_cr3 contains the current logical cr3 value; it contains the
- * last set cr3. This may not be the current effective cr3, because
- * its update may be being lazily deferred. However, a vcpu looking
- * at its own cr3 can use this value knowing that it everything will
- * be self-consistent.
- *
- * xen_current_cr3 contains the actual vcpu cr3; it is set once the
- * hypercall to set the vcpu cr3 is complete (so it may be a little
- * out of date, but it will never be set early). If one vcpu is
- * looking at another vcpu's cr3 value, it should use this variable.
- */
-DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
-DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
-
-
-/*
- * Just beyond the highest usermode address. STACK_TOP_MAX has a
- * redzone above it, so round it up to a PGD boundary.
- */
-#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
-
-unsigned long arbitrary_virt_to_mfn(void *vaddr)
-{
- xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
-
- return PFN_DOWN(maddr.maddr);
-}
-
-xmaddr_t arbitrary_virt_to_machine(void *vaddr)
-{
- unsigned long address = (unsigned long)vaddr;
- unsigned int level;
- pte_t *pte;
- unsigned offset;
-
- /*
- * if the PFN is in the linear mapped vaddr range, we can just use
- * the (quick) virt_to_machine() p2m lookup
- */
- if (virt_addr_valid(vaddr))
- return virt_to_machine(vaddr);
-
- /* otherwise we have to do a (slower) full page-table walk */
-
- pte = lookup_address(address, &level);
- BUG_ON(pte == NULL);
- offset = address & ~PAGE_MASK;
- return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
-}
-EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
-
-void make_lowmem_page_readonly(void *vaddr)
-{
- pte_t *pte, ptev;
- unsigned long address = (unsigned long)vaddr;
- unsigned int level;
-
- pte = lookup_address(address, &level);
- if (pte == NULL)
- return; /* vaddr missing */
-
- ptev = pte_wrprotect(*pte);
-
- if (HYPERVISOR_update_va_mapping(address, ptev, 0))
- BUG();
-}
-
-void make_lowmem_page_readwrite(void *vaddr)
-{
- pte_t *pte, ptev;
- unsigned long address = (unsigned long)vaddr;
- unsigned int level;
-
- pte = lookup_address(address, &level);
- if (pte == NULL)
- return; /* vaddr missing */
-
- ptev = pte_mkwrite(*pte);
-
- if (HYPERVISOR_update_va_mapping(address, ptev, 0))
- BUG();
-}
-
-
-static bool xen_page_pinned(void *ptr)
-{
- struct page *page = virt_to_page(ptr);
-
- return PagePinned(page);
-}
-
-void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
-{
- struct multicall_space mcs;
- struct mmu_update *u;
-
- trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
-
- mcs = xen_mc_entry(sizeof(*u));
- u = mcs.args;
-
- /* ptep might be kmapped when using 32-bit HIGHPTE */
- u->ptr = virt_to_machine(ptep).maddr;
- u->val = pte_val_ma(pteval);
-
- MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-EXPORT_SYMBOL_GPL(xen_set_domain_pte);
-
-static void xen_extend_mmu_update(const struct mmu_update *update)
-{
- struct multicall_space mcs;
- struct mmu_update *u;
-
- mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
-
- if (mcs.mc != NULL) {
- mcs.mc->args[1]++;
- } else {
- mcs = __xen_mc_entry(sizeof(*u));
- MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
- }
-
- u = mcs.args;
- *u = *update;
-}
-
-static void xen_extend_mmuext_op(const struct mmuext_op *op)
-{
- struct multicall_space mcs;
- struct mmuext_op *u;
-
- mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
-
- if (mcs.mc != NULL) {
- mcs.mc->args[1]++;
- } else {
- mcs = __xen_mc_entry(sizeof(*u));
- MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
- }
-
- u = mcs.args;
- *u = *op;
-}
-
-static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
-{
- struct mmu_update u;
-
- preempt_disable();
-
- xen_mc_batch();
-
- /* ptr may be ioremapped for 64-bit pagetable setup */
- u.ptr = arbitrary_virt_to_machine(ptr).maddr;
- u.val = pmd_val_ma(val);
- xen_extend_mmu_update(&u);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-static void xen_set_pmd(pmd_t *ptr, pmd_t val)
-{
- trace_xen_mmu_set_pmd(ptr, val);
-
- /* If page is not pinned, we can just update the entry
- directly */
- if (!xen_page_pinned(ptr)) {
- *ptr = val;
- return;
- }
-
- xen_set_pmd_hyper(ptr, val);
-}
-
-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
-{
- set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
-}
-
-static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
-{
- struct mmu_update u;
-
- if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
- return false;
-
- xen_mc_batch();
-
- u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
- u.val = pte_val_ma(pteval);
- xen_extend_mmu_update(&u);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- return true;
-}
-
-static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
-{
- if (!xen_batched_set_pte(ptep, pteval))
- native_set_pte(ptep, pteval);
-}
-
-static void xen_set_pte(pte_t *ptep, pte_t pteval)
-{
- trace_xen_mmu_set_pte(ptep, pteval);
- __xen_set_pte(ptep, pteval);
-}
-
-static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
-{
- trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
- __xen_set_pte(ptep, pteval);
-}
-
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- /* Just return the pte as-is. We preserve the bits on commit */
- trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
- return *ptep;
-}
-
-void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
-{
- struct mmu_update u;
-
- trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
- xen_mc_batch();
-
- u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
- u.val = pte_val_ma(pte);
- xen_extend_mmu_update(&u);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-
-/* Assume pteval_t is equivalent to all the other *val_t types. */
-static pteval_t pte_mfn_to_pfn(pteval_t val)
-{
- if (val & _PAGE_PRESENT) {
- unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- unsigned long pfn = mfn_to_pfn(mfn);
-
- pteval_t flags = val & PTE_FLAGS_MASK;
- if (unlikely(pfn == ~0))
- val = flags & ~_PAGE_PRESENT;
- else
- val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
- }
-
- return val;
-}
-
-static pteval_t pte_pfn_to_mfn(pteval_t val)
-{
- if (val & _PAGE_PRESENT) {
- unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- pteval_t flags = val & PTE_FLAGS_MASK;
- unsigned long mfn;
-
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- mfn = get_phys_to_machine(pfn);
- else
- mfn = pfn;
- /*
- * If there's no mfn for the pfn, then just create an
- * empty non-present pte. Unfortunately this loses
- * information about the original pfn, so
- * pte_mfn_to_pfn is asymmetric.
- */
- if (unlikely(mfn == INVALID_P2M_ENTRY)) {
- mfn = 0;
- flags = 0;
- } else {
- /*
- * Paramount to do this test _after_ the
- * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
- * IDENTITY_FRAME_BIT resolves to true.
- */
- mfn &= ~FOREIGN_FRAME_BIT;
- if (mfn & IDENTITY_FRAME_BIT) {
- mfn &= ~IDENTITY_FRAME_BIT;
- flags |= _PAGE_IOMAP;
- }
- }
- val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
- }
-
- return val;
-}
-
-static pteval_t iomap_pte(pteval_t val)
-{
- if (val & _PAGE_PRESENT) {
- unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- pteval_t flags = val & PTE_FLAGS_MASK;
-
- /* We assume the pte frame number is a MFN, so
- just use it as-is. */
- val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
- }
-
- return val;
-}
-
-static pteval_t xen_pte_val(pte_t pte)
-{
- pteval_t pteval = pte.pte;
-#if 0
- /* If this is a WC pte, convert back from Xen WC to Linux WC */
- if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
- WARN_ON(!pat_enabled);
- pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
- }
-#endif
- if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
- return pteval;
-
- return pte_mfn_to_pfn(pteval);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
-
-static pgdval_t xen_pgd_val(pgd_t pgd)
-{
- return pte_mfn_to_pfn(pgd.pgd);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
-
-/*
- * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7
- * are reserved for now, to correspond to the Intel-reserved PAT
- * types.
- *
- * We expect Linux's PAT set as follows:
- *
- * Idx PTE flags Linux Xen Default
- * 0 WB WB WB
- * 1 PWT WC WT WT
- * 2 PCD UC- UC- UC-
- * 3 PCD PWT UC UC UC
- * 4 PAT WB WC WB
- * 5 PAT PWT WC WP WT
- * 6 PAT PCD UC- UC UC-
- * 7 PAT PCD PWT UC UC UC
- */
-
-void xen_set_pat(u64 pat)
-{
- /* We expect Linux to use a PAT setting of
- * UC UC- WC WB (ignoring the PAT flag) */
- WARN_ON(pat != 0x0007010600070106ull);
-}
-
-static pte_t xen_make_pte(pteval_t pte)
-{
- phys_addr_t addr = (pte & PTE_PFN_MASK);
-#if 0
- /* If Linux is trying to set a WC pte, then map to the Xen WC.
- * If _PAGE_PAT is set, then it probably means it is really
- * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
- * things work out OK...
- *
- * (We should never see kernel mappings with _PAGE_PSE set,
- * but we could see hugetlbfs mappings, I think.).
- */
- if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) {
- if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
- pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
- }
-#endif
- /*
- * Unprivileged domains are allowed to do IOMAPpings for
- * PCI passthrough, but not map ISA space. The ISA
- * mappings are just dummy local mappings to keep other
- * parts of the kernel happy.
- */
- if (unlikely(pte & _PAGE_IOMAP) &&
- (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
- pte = iomap_pte(pte);
- } else {
- pte &= ~_PAGE_IOMAP;
- pte = pte_pfn_to_mfn(pte);
- }
-
- return native_make_pte(pte);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-
-static pgd_t xen_make_pgd(pgdval_t pgd)
-{
- pgd = pte_pfn_to_mfn(pgd);
- return native_make_pgd(pgd);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
-
-static pmdval_t xen_pmd_val(pmd_t pmd)
-{
- return pte_mfn_to_pfn(pmd.pmd);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
-
-static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
-{
- struct mmu_update u;
-
- preempt_disable();
-
- xen_mc_batch();
-
- /* ptr may be ioremapped for 64-bit pagetable setup */
- u.ptr = arbitrary_virt_to_machine(ptr).maddr;
- u.val = pud_val_ma(val);
- xen_extend_mmu_update(&u);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-static void xen_set_pud(pud_t *ptr, pud_t val)
-{
- trace_xen_mmu_set_pud(ptr, val);
-
- /* If page is not pinned, we can just update the entry
- directly */
- if (!xen_page_pinned(ptr)) {
- *ptr = val;
- return;
- }
-
- xen_set_pud_hyper(ptr, val);
-}
-
-#ifdef CONFIG_X86_PAE
-static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
- trace_xen_mmu_set_pte_atomic(ptep, pte);
- set_64bit((u64 *)ptep, native_pte_val(pte));
-}
-
-static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- trace_xen_mmu_pte_clear(mm, addr, ptep);
- if (!xen_batched_set_pte(ptep, native_make_pte(0)))
- native_pte_clear(mm, addr, ptep);
-}
-
-static void xen_pmd_clear(pmd_t *pmdp)
-{
- trace_xen_mmu_pmd_clear(pmdp);
- set_pmd(pmdp, __pmd(0));
-}
-#endif /* CONFIG_X86_PAE */
-
-static pmd_t xen_make_pmd(pmdval_t pmd)
-{
- pmd = pte_pfn_to_mfn(pmd);
- return native_make_pmd(pmd);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
-
-#if PAGETABLE_LEVELS == 4
-static pudval_t xen_pud_val(pud_t pud)
-{
- return pte_mfn_to_pfn(pud.pud);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
-
-static pud_t xen_make_pud(pudval_t pud)
-{
- pud = pte_pfn_to_mfn(pud);
-
- return native_make_pud(pud);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
-
-static pgd_t *xen_get_user_pgd(pgd_t *pgd)
-{
- pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
- unsigned offset = pgd - pgd_page;
- pgd_t *user_ptr = NULL;
-
- if (offset < pgd_index(USER_LIMIT)) {
- struct page *page = virt_to_page(pgd_page);
- user_ptr = (pgd_t *)page->private;
- if (user_ptr)
- user_ptr += offset;
- }
-
- return user_ptr;
-}
-
-static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
-{
- struct mmu_update u;
-
- u.ptr = virt_to_machine(ptr).maddr;
- u.val = pgd_val_ma(val);
- xen_extend_mmu_update(&u);
-}
-
-/*
- * Raw hypercall-based set_pgd, intended for in early boot before
- * there's a page structure. This implies:
- * 1. The only existing pagetable is the kernel's
- * 2. It is always pinned
- * 3. It has no user pagetable attached to it
- */
-static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
-{
- preempt_disable();
-
- xen_mc_batch();
-
- __xen_set_pgd_hyper(ptr, val);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-static void xen_set_pgd(pgd_t *ptr, pgd_t val)
-{
- pgd_t *user_ptr = xen_get_user_pgd(ptr);
-
- trace_xen_mmu_set_pgd(ptr, user_ptr, val);
-
- /* If page is not pinned, we can just update the entry
- directly */
- if (!xen_page_pinned(ptr)) {
- *ptr = val;
- if (user_ptr) {
- WARN_ON(xen_page_pinned(user_ptr));
- *user_ptr = val;
- }
- return;
- }
-
- /* If it's pinned, then we can at least batch the kernel and
- user updates together. */
- xen_mc_batch();
-
- __xen_set_pgd_hyper(ptr, val);
- if (user_ptr)
- __xen_set_pgd_hyper(user_ptr, val);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-#endif /* PAGETABLE_LEVELS == 4 */
-
-/*
- * (Yet another) pagetable walker. This one is intended for pinning a
- * pagetable. This means that it walks a pagetable and calls the
- * callback function on each page it finds making up the page table,
- * at every level. It walks the entire pagetable, but it only bothers
- * pinning pte pages which are below limit. In the normal case this
- * will be STACK_TOP_MAX, but at boot we need to pin up to
- * FIXADDR_TOP.
- *
- * For 32-bit the important bit is that we don't pin beyond there,
- * because then we start getting into Xen's ptes.
- *
- * For 64-bit, we must skip the Xen hole in the middle of the address
- * space, just after the big x86-64 virtual hole.
- */
-static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
-{
- int flush = 0;
- unsigned hole_low, hole_high;
- unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
- unsigned pgdidx, pudidx, pmdidx;
-
- /* The limit is the last byte to be touched */
- limit--;
- BUG_ON(limit >= FIXADDR_TOP);
-
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return 0;
-
- /*
- * 64-bit has a great big hole in the middle of the address
- * space, which contains the Xen mappings. On 32-bit these
- * will end up making a zero-sized hole and so is a no-op.
- */
- hole_low = pgd_index(USER_LIMIT);
- hole_high = pgd_index(PAGE_OFFSET);
-
- pgdidx_limit = pgd_index(limit);
-#if PTRS_PER_PUD > 1
- pudidx_limit = pud_index(limit);
-#else
- pudidx_limit = 0;
-#endif
-#if PTRS_PER_PMD > 1
- pmdidx_limit = pmd_index(limit);
-#else
- pmdidx_limit = 0;
-#endif
-
- for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
- pud_t *pud;
-
- if (pgdidx >= hole_low && pgdidx < hole_high)
- continue;
-
- if (!pgd_val(pgd[pgdidx]))
- continue;
-
- pud = pud_offset(&pgd[pgdidx], 0);
-
- if (PTRS_PER_PUD > 1) /* not folded */
- flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-
- for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
- pmd_t *pmd;
-
- if (pgdidx == pgdidx_limit &&
- pudidx > pudidx_limit)
- goto out;
-
- if (pud_none(pud[pudidx]))
- continue;
-
- pmd = pmd_offset(&pud[pudidx], 0);
-
- if (PTRS_PER_PMD > 1) /* not folded */
- flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
-
- for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
- struct page *pte;
-
- if (pgdidx == pgdidx_limit &&
- pudidx == pudidx_limit &&
- pmdidx > pmdidx_limit)
- goto out;
-
- if (pmd_none(pmd[pmdidx]))
- continue;
-
- pte = pmd_page(pmd[pmdidx]);
- flush |= (*func)(mm, pte, PT_PTE);
- }
- }
- }
-
-out:
- /* Do the top level last, so that the callbacks can use it as
- a cue to do final things like tlb flushes. */
- flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
-
- return flush;
-}
-
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
-{
- return __xen_pgd_walk(mm, mm->pgd, func, limit);
-}
-
-/* If we're using split pte locks, then take the page's lock and
- return a pointer to it. Otherwise return NULL. */
-static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
-{
- spinlock_t *ptl = NULL;
-
-#if USE_SPLIT_PTLOCKS
- ptl = __pte_lockptr(page);
- spin_lock_nest_lock(ptl, &mm->page_table_lock);
-#endif
-
- return ptl;
-}
-
-static void xen_pte_unlock(void *v)
-{
- spinlock_t *ptl = v;
- spin_unlock(ptl);
-}
-
-static void xen_do_pin(unsigned level, unsigned long pfn)
-{
- struct mmuext_op op;
-
- op.cmd = level;
- op.arg1.mfn = pfn_to_mfn(pfn);
-
- xen_extend_mmuext_op(&op);
-}
-
-static int xen_pin_page(struct mm_struct *mm, struct page *page,
- enum pt_level level)
-{
- unsigned pgfl = TestSetPagePinned(page);
- int flush;
-
- if (pgfl)
- flush = 0; /* already pinned */
- else if (PageHighMem(page))
- /* kmaps need flushing if we found an unpinned
- highpage */
- flush = 1;
- else {
- void *pt = lowmem_page_address(page);
- unsigned long pfn = page_to_pfn(page);
- struct multicall_space mcs = __xen_mc_entry(0);
- spinlock_t *ptl;
-
- flush = 0;
-
- /*
- * We need to hold the pagetable lock between the time
- * we make the pagetable RO and when we actually pin
- * it. If we don't, then other users may come in and
- * attempt to update the pagetable by writing it,
- * which will fail because the memory is RO but not
- * pinned, so Xen won't do the trap'n'emulate.
- *
- * If we're using split pte locks, we can't hold the
- * entire pagetable's worth of locks during the
- * traverse, because we may wrap the preempt count (8
- * bits). The solution is to mark RO and pin each PTE
- * page while holding the lock. This means the number
- * of locks we end up holding is never more than a
- * batch size (~32 entries, at present).
- *
- * If we're not using split pte locks, we needn't pin
- * the PTE pages independently, because we're
- * protected by the overall pagetable lock.
- */
- ptl = NULL;
- if (level == PT_PTE)
- ptl = xen_pte_lock(page, mm);
-
- MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
- pfn_pte(pfn, PAGE_KERNEL_RO),
- level == PT_PGD ? UVMF_TLB_FLUSH : 0);
-
- if (ptl) {
- xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
-
- /* Queue a deferred unlock for when this batch
- is completed. */
- xen_mc_callback(xen_pte_unlock, ptl);
- }
- }
-
- return flush;
-}
-
-/* This is called just after a mm has been created, but it has not
- been used yet. We need to make sure that its pagetable is all
- read-only, and can be pinned. */
-static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
-{
- trace_xen_mmu_pgd_pin(mm, pgd);
-
- xen_mc_batch();
-
- if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
- /* re-enable interrupts for flushing */
- xen_mc_issue(0);
-
- kmap_flush_unused();
-
- xen_mc_batch();
- }
-
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(pgd);
-
- xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
-
- if (user_pgd) {
- xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
- xen_do_pin(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa(user_pgd)));
- }
- }
-#else /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_PAE
- /* Need to make sure unshared kernel PMD is pinnable */
- xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
- PT_PMD);
-#endif
- xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
-#endif /* CONFIG_X86_64 */
- xen_mc_issue(0);
-}
-
-static void xen_pgd_pin(struct mm_struct *mm)
-{
- __xen_pgd_pin(mm, mm->pgd);
-}
-
-/*
- * On save, we need to pin all pagetables to make sure they get their
- * mfns turned into pfns. Search the list for any unpinned pgds and pin
- * them (unpinned pgds are not currently in use, probably because the
- * process is under construction or destruction).
- *
- * Expected to be called in stop_machine() ("equivalent to taking
- * every spinlock in the system"), so the locking doesn't really
- * matter all that much.
- */
-void xen_mm_pin_all(void)
-{
- struct page *page;
-
- spin_lock(&pgd_lock);
-
- list_for_each_entry(page, &pgd_list, lru) {
- if (!PagePinned(page)) {
- __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
- SetPageSavePinned(page);
- }
- }
-
- spin_unlock(&pgd_lock);
-}
-
-/*
- * The init_mm pagetable is really pinned as soon as its created, but
- * that's before we have page structures to store the bits. So do all
- * the book-keeping now.
- */
-static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
- enum pt_level level)
-{
- SetPagePinned(page);
- return 0;
-}
-
-static void __init xen_mark_init_mm_pinned(void)
-{
- xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
-}
-
-static int xen_unpin_page(struct mm_struct *mm, struct page *page,
- enum pt_level level)
-{
- unsigned pgfl = TestClearPagePinned(page);
-
- if (pgfl && !PageHighMem(page)) {
- void *pt = lowmem_page_address(page);
- unsigned long pfn = page_to_pfn(page);
- spinlock_t *ptl = NULL;
- struct multicall_space mcs;
-
- /*
- * Do the converse to pin_page. If we're using split
- * pte locks, we must be holding the lock for while
- * the pte page is unpinned but still RO to prevent
- * concurrent updates from seeing it in this
- * partially-pinned state.
- */
- if (level == PT_PTE) {
- ptl = xen_pte_lock(page, mm);
-
- if (ptl)
- xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
- }
-
- mcs = __xen_mc_entry(0);
-
- MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
- pfn_pte(pfn, PAGE_KERNEL),
- level == PT_PGD ? UVMF_TLB_FLUSH : 0);
-
- if (ptl) {
- /* unlock when batch completed */
- xen_mc_callback(xen_pte_unlock, ptl);
- }
- }
-
- return 0; /* never need to flush on unpin */
-}
-
-/* Release a pagetables pages back as normal RW */
-static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
-{
- trace_xen_mmu_pgd_unpin(mm, pgd);
-
- xen_mc_batch();
-
- xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(pgd);
-
- if (user_pgd) {
- xen_do_pin(MMUEXT_UNPIN_TABLE,
- PFN_DOWN(__pa(user_pgd)));
- xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
- }
- }
-#endif
-
-#ifdef CONFIG_X86_PAE
- /* Need to make sure unshared kernel PMD is unpinned */
- xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
- PT_PMD);
-#endif
-
- __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
-
- xen_mc_issue(0);
-}
-
-static void xen_pgd_unpin(struct mm_struct *mm)
-{
- __xen_pgd_unpin(mm, mm->pgd);
-}
-
-/*
- * On resume, undo any pinning done at save, so that the rest of the
- * kernel doesn't see any unexpected pinned pagetables.
- */
-void xen_mm_unpin_all(void)
-{
- struct page *page;
-
- spin_lock(&pgd_lock);
-
- list_for_each_entry(page, &pgd_list, lru) {
- if (PageSavePinned(page)) {
- BUG_ON(!PagePinned(page));
- __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
- ClearPageSavePinned(page);
- }
- }
-
- spin_unlock(&pgd_lock);
-}
-
-static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
- spin_lock(&next->page_table_lock);
- xen_pgd_pin(next);
- spin_unlock(&next->page_table_lock);
-}
-
-static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
-{
- spin_lock(&mm->page_table_lock);
- xen_pgd_pin(mm);
- spin_unlock(&mm->page_table_lock);
-}
-
-
-#ifdef CONFIG_SMP
-/* Another cpu may still have their %cr3 pointing at the pagetable, so
- we need to repoint it somewhere else before we can unpin it. */
-static void drop_other_mm_ref(void *info)
-{
- struct mm_struct *mm = info;
- struct mm_struct *active_mm;
-
- active_mm = this_cpu_read(cpu_tlbstate.active_mm);
-
- if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
- leave_mm(smp_processor_id());
-
- /* If this cpu still has a stale cr3 reference, then make sure
- it has been flushed. */
- if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
- load_cr3(swapper_pg_dir);
-}
-
-static void xen_drop_mm_ref(struct mm_struct *mm)
-{
- cpumask_var_t mask;
- unsigned cpu;
-
- if (current->active_mm == mm) {
- if (current->mm == mm)
- load_cr3(swapper_pg_dir);
- else
- leave_mm(smp_processor_id());
- }
-
- /* Get the "official" set of cpus referring to our pagetable. */
- if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
- for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
- && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
- continue;
- smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
- }
- return;
- }
- cpumask_copy(mask, mm_cpumask(mm));
-
- /* It's possible that a vcpu may have a stale reference to our
- cr3, because its in lazy mode, and it hasn't yet flushed
- its set of pending hypercalls yet. In this case, we can
- look at its actual current cr3 value, and force it to flush
- if needed. */
- for_each_online_cpu(cpu) {
- if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
- cpumask_set_cpu(cpu, mask);
- }
-
- if (!cpumask_empty(mask))
- smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
- free_cpumask_var(mask);
-}
-#else
-static void xen_drop_mm_ref(struct mm_struct *mm)
-{
- if (current->active_mm == mm)
- load_cr3(swapper_pg_dir);
-}
-#endif
-
-/*
- * While a process runs, Xen pins its pagetables, which means that the
- * hypervisor forces it to be read-only, and it controls all updates
- * to it. This means that all pagetable updates have to go via the
- * hypervisor, which is moderately expensive.
- *
- * Since we're pulling the pagetable down, we switch to use init_mm,
- * unpin old process pagetable and mark it all read-write, which
- * allows further operations on it to be simple memory accesses.
- *
- * The only subtle point is that another CPU may be still using the
- * pagetable because of lazy tlb flushing. This means we need need to
- * switch all CPUs off this pagetable before we can unpin it.
- */
-static void xen_exit_mmap(struct mm_struct *mm)
-{
- get_cpu(); /* make sure we don't move around */
- xen_drop_mm_ref(mm);
- put_cpu();
-
- spin_lock(&mm->page_table_lock);
-
- /* pgd may not be pinned in the error exit path of execve */
- if (xen_page_pinned(mm->pgd))
- xen_pgd_unpin(mm);
-
- spin_unlock(&mm->page_table_lock);
-}
-
-static void __init xen_pagetable_setup_start(pgd_t *base)
-{
-}
-
-static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
-{
- /* reserve the range used */
- native_pagetable_reserve(start, end);
-
- /* set as RW the rest */
- printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
- PFN_PHYS(pgt_buf_top));
- while (end < PFN_PHYS(pgt_buf_top)) {
- make_lowmem_page_readwrite(__va(end));
- end += PAGE_SIZE;
- }
-}
-
-static void xen_post_allocator_init(void);
-
-static void __init xen_pagetable_setup_done(pgd_t *base)
-{
- xen_setup_shared_info();
- xen_post_allocator_init();
-}
-
-static void xen_write_cr2(unsigned long cr2)
-{
- this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
-}
-
-static unsigned long xen_read_cr2(void)
-{
- return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
- return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
-static void xen_flush_tlb(void)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
-
- trace_xen_mmu_flush_tlb(0);
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-static void xen_flush_tlb_single(unsigned long addr)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
-
- trace_xen_mmu_flush_tlb_single(addr);
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*op));
- op = mcs.args;
- op->cmd = MMUEXT_INVLPG_LOCAL;
- op->arg1.linear_addr = addr & PAGE_MASK;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-static void xen_flush_tlb_others(const struct cpumask *cpus,
- struct mm_struct *mm, unsigned long va)
-{
- struct {
- struct mmuext_op op;
-#ifdef CONFIG_SMP
- DECLARE_BITMAP(mask, num_processors);
-#else
- DECLARE_BITMAP(mask, NR_CPUS);
-#endif
- } *args;
- struct multicall_space mcs;
-
- trace_xen_mmu_flush_tlb_others(cpus, mm, va);
-
- if (cpumask_empty(cpus))
- return; /* nothing to do */
-
- mcs = xen_mc_entry(sizeof(*args));
- args = mcs.args;
- args->op.arg2.vcpumask = to_cpumask(args->mask);
-
- /* Remove us, and any offline CPUS. */
- cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
-
- if (va == TLB_FLUSH_ALL) {
- args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- } else {
- args->op.cmd = MMUEXT_INVLPG_MULTI;
- args->op.arg1.linear_addr = va;
- }
-
- MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-
-static unsigned long xen_read_cr3(void)
-{
- return this_cpu_read(xen_cr3);
-}
-
-static void set_current_cr3(void *v)
-{
- this_cpu_write(xen_current_cr3, (unsigned long)v);
-}
-
-static void __xen_write_cr3(bool kernel, unsigned long cr3)
-{
- struct mmuext_op op;
- unsigned long mfn;
-
- trace_xen_mmu_write_cr3(kernel, cr3);
-
- if (cr3)
- mfn = pfn_to_mfn(PFN_DOWN(cr3));
- else
- mfn = 0;
-
- WARN_ON(mfn == 0 && kernel);
-
- op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
- op.arg1.mfn = mfn;
-
- xen_extend_mmuext_op(&op);
-
- if (kernel) {
- this_cpu_write(xen_cr3, cr3);
-
- /* Update xen_current_cr3 once the batch has actually
- been submitted. */
- xen_mc_callback(set_current_cr3, (void *)cr3);
- }
-}
-
-static void xen_write_cr3(unsigned long cr3)
-{
- BUG_ON(preemptible());
-
- xen_mc_batch(); /* disables interrupts */
-
- /* Update while interrupts are disabled, so its atomic with
- respect to ipis */
- this_cpu_write(xen_cr3, cr3);
-
- __xen_write_cr3(true, cr3);
-
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
- if (user_pgd)
- __xen_write_cr3(false, __pa(user_pgd));
- else
- __xen_write_cr3(false, 0);
- }
-#endif
-
- xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
-}
-
-static int xen_pgd_alloc(struct mm_struct *mm)
-{
- pgd_t *pgd = mm->pgd;
- int ret = 0;
-
- BUG_ON(PagePinned(virt_to_page(pgd)));
-
-#ifdef CONFIG_X86_64
- {
- struct page *page = virt_to_page(pgd);
- pgd_t *user_pgd;
-
- BUG_ON(page->private != 0);
-
- ret = -ENOMEM;
-
- user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- page->private = (unsigned long)user_pgd;
-
- if (user_pgd != NULL) {
- user_pgd[pgd_index(VSYSCALL_START)] =
- __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
- ret = 0;
- }
-
- BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
- }
-#endif
-
- return ret;
-}
-
-static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-#ifdef CONFIG_X86_64
- pgd_t *user_pgd = xen_get_user_pgd(pgd);
-
- if (user_pgd)
- free_page((unsigned long)user_pgd);
-#endif
-}
-
-#ifdef CONFIG_X86_32
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
- /* If there's an existing pte, then don't allow _PAGE_RW to be set */
- if (pte_val_ma(*ptep) & _PAGE_PRESENT)
- pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
- pte_val_ma(pte));
-
- return pte;
-}
-#else /* CONFIG_X86_64 */
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
- unsigned long pfn = pte_pfn(pte);
-
- /*
- * If the new pfn is within the range of the newly allocated
- * kernel pagetable, and it isn't being mapped into an
- * early_ioremap fixmap slot as a freshly allocated page, make sure
- * it is RO.
- */
- if (((!is_early_ioremap_ptep(ptep) &&
- pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
- (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
- pte = pte_wrprotect(pte);
-
- return pte;
-}
-#endif /* CONFIG_X86_64 */
-
-/* Init-time set_pte while constructing initial pagetables, which
- doesn't allow RO pagetable pages to be remapped RW */
-static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
-{
- pte = mask_rw_pte(ptep, pte);
-
- xen_set_pte(ptep, pte);
-}
-
-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
-{
- struct mmuext_op op;
- op.cmd = cmd;
- op.arg1.mfn = pfn_to_mfn(pfn);
- if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
- BUG();
-}
-
-/* Early in boot, while setting up the initial pagetable, assume
- everything is pinned. */
-static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
-{
-#ifdef CONFIG_FLATMEM
- BUG_ON(mem_map); /* should only be used early */
-#endif
- make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
-}
-
-/* Used for pmd and pud */
-static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
-{
-#ifdef CONFIG_FLATMEM
- BUG_ON(mem_map); /* should only be used early */
-#endif
- make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
-}
-
-/* Early release_pte assumes that all pts are pinned, since there's
- only init_mm and anything attached to that is pinned. */
-static void __init xen_release_pte_init(unsigned long pfn)
-{
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
- make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
-}
-
-static void __init xen_release_pmd_init(unsigned long pfn)
-{
- make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
-}
-
-static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
-{
- struct multicall_space mcs;
- struct mmuext_op *op;
-
- mcs = __xen_mc_entry(sizeof(*op));
- op = mcs.args;
- op->cmd = cmd;
- op->arg1.mfn = pfn_to_mfn(pfn);
-
- MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
-}
-
-static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
-{
- struct multicall_space mcs;
- unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
-
- mcs = __xen_mc_entry(0);
- MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
- pfn_pte(pfn, prot), 0);
-}
-
-/* This needs to make sure the new pte page is pinned iff its being
- attached to a pinned pagetable. */
-static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
- unsigned level)
-{
- bool pinned = PagePinned(virt_to_page(mm->pgd));
-
- trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
-
- if (pinned) {
- struct page *page = pfn_to_page(pfn);
-
- SetPagePinned(page);
-
- if (!PageHighMem(page)) {
- xen_mc_batch();
-
- __set_pfn_prot(pfn, PAGE_KERNEL_RO);
-
- if (level == PT_PTE && USE_SPLIT_PTLOCKS)
- __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
- } else {
- /* make sure there are no stray mappings of
- this page */
- kmap_flush_unused();
- }
- }
-}
-
-static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
-{
- xen_alloc_ptpage(mm, pfn, PT_PTE);
-}
-
-static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
-{
- xen_alloc_ptpage(mm, pfn, PT_PMD);
-}
-
-/* This should never happen until we're OK to use struct page */
-static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
-{
- struct page *page = pfn_to_page(pfn);
- bool pinned = PagePinned(page);
-
- trace_xen_mmu_release_ptpage(pfn, level, pinned);
-
- if (pinned) {
- if (!PageHighMem(page)) {
- xen_mc_batch();
-
- if (level == PT_PTE && USE_SPLIT_PTLOCKS)
- __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
-
- __set_pfn_prot(pfn, PAGE_KERNEL);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
- }
- ClearPagePinned(page);
- }
-}
-
-static void xen_release_pte(unsigned long pfn)
-{
- xen_release_ptpage(pfn, PT_PTE);
-}
-
-static void xen_release_pmd(unsigned long pfn)
-{
- xen_release_ptpage(pfn, PT_PMD);
-}
-
-#if PAGETABLE_LEVELS == 4
-static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
-{
- xen_alloc_ptpage(mm, pfn, PT_PUD);
-}
-
-static void xen_release_pud(unsigned long pfn)
-{
- xen_release_ptpage(pfn, PT_PUD);
-}
-#endif
-
-void __init xen_reserve_top(void)
-{
-#ifdef CONFIG_X86_32
- unsigned long top = HYPERVISOR_VIRT_START;
- struct xen_platform_parameters pp;
-
- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
- top = pp.virt_start;
-
- reserve_top_address(-top);
-#endif /* CONFIG_X86_32 */
-}
-
-/*
- * Like __va(), but returns address in the kernel mapping (which is
- * all we have until the physical memory mapping has been set up.
- */
-static void *__ka(phys_addr_t paddr)
-{
-#ifdef CONFIG_X86_64
- return (void *)(paddr + __START_KERNEL_map);
-#else
- return __va(paddr);
-#endif
-}
-
-/* Convert a machine address to physical address */
-static unsigned long m2p(phys_addr_t maddr)
-{
- phys_addr_t paddr;
-
- maddr &= PTE_PFN_MASK;
- paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
-
- return paddr;
-}
-
-/* Convert a machine address to kernel virtual */
-static void *m2v(phys_addr_t maddr)
-{
- return __ka(m2p(maddr));
-}
-
-/* Set the page permissions on an identity-mapped pages */
-static void set_page_prot(void *addr, pgprot_t prot)
-{
- unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
- pte_t pte = pfn_pte(pfn, prot);
-
- if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
- BUG();
-}
-
-static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
-{
- unsigned pmdidx, pteidx;
- unsigned ident_pte;
- unsigned long pfn;
-
- level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
- PAGE_SIZE);
-
- ident_pte = 0;
- pfn = 0;
- for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
- pte_t *pte_page;
-
- /* Reuse or allocate a page of ptes */
- if (pmd_present(pmd[pmdidx]))
- pte_page = m2v(pmd[pmdidx].pmd);
- else {
- /* Check for free pte pages */
- if (ident_pte == LEVEL1_IDENT_ENTRIES)
- break;
-
- pte_page = &level1_ident_pgt[ident_pte];
- ident_pte += PTRS_PER_PTE;
-
- pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
- }
-
- /* Install mappings */
- for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
- pte_t pte;
-
-#ifdef CONFIG_X86_32
- if (pfn > max_pfn_mapped)
- max_pfn_mapped = pfn;
-#endif
-
- if (!pte_none(pte_page[pteidx]))
- continue;
-
- pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
- pte_page[pteidx] = pte;
- }
- }
-
- for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
- set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
-
- set_page_prot(pmd, PAGE_KERNEL_RO);
-}
-
-void __init xen_setup_machphys_mapping(void)
-{
- struct xen_machphys_mapping mapping;
-
- if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
- machine_to_phys_mapping = (unsigned long *)mapping.v_start;
- machine_to_phys_nr = mapping.max_mfn + 1;
- } else {
- machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
- }
-#ifdef CONFIG_X86_32
- WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
- < machine_to_phys_mapping);
-#endif
-}
-
-#ifdef CONFIG_X86_64
-static void convert_pfn_mfn(void *v)
-{
- pte_t *pte = v;
- int i;
-
- /* All levels are converted the same way, so just treat them
- as ptes. */
- for (i = 0; i < PTRS_PER_PTE; i++)
- pte[i] = xen_make_pte(pte[i].pte);
-}
-
-/*
- * Set up the initial kernel pagetable.
- *
- * We can construct this by grafting the Xen provided pagetable into
- * head_64.S's preconstructed pagetables. We copy the Xen L2's into
- * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
- * means that only the kernel has a physical mapping to start with -
- * but that's enough to get __va working. We need to fill in the rest
- * of the physical mapping once some sort of allocator has been set
- * up.
- */
-pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
- unsigned long max_pfn)
-{
- pud_t *l3;
- pmd_t *l2;
-
- /* max_pfn_mapped is the last pfn mapped in the initial memory
- * mappings. Considering that on Xen after the kernel mappings we
- * have the mappings of some pages that don't exist in pfn space, we
- * set max_pfn_mapped to the last real pfn mapped. */
- max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
-
- /* Zap identity mapping */
- init_level4_pgt[0] = __pgd(0);
-
- /* Pre-constructed entries are in pfn, so convert to mfn */
- convert_pfn_mfn(init_level4_pgt);
- convert_pfn_mfn(level3_ident_pgt);
- convert_pfn_mfn(level3_kernel_pgt);
-
- l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
- l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
-
- memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
- memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
-
- l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
- l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
- memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
-
- /* Set up identity map */
- xen_map_identity_early(level2_ident_pgt, max_pfn);
-
- /* Make pagetable pieces RO */
- set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
- set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
- set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
- set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
- set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
- set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
-
- /* Pin down new L4 */
- pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa_symbol(init_level4_pgt)));
-
- /* Unpin Xen-provided one */
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-
- /* Switch over */
- pgd = init_level4_pgt;
-
- /*
- * At this stage there can be no user pgd, and no page
- * structure to attach it to, so make sure we just set kernel
- * pgd.
- */
- xen_mc_batch();
- __xen_write_cr3(true, __pa(pgd));
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-
- memblock_reserve(__pa(xen_start_info->pt_base),
- xen_start_info->nr_pt_frames * PAGE_SIZE);
-
- return pgd;
-}
-#else /* !CONFIG_X86_64 */
-static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
-static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
-
-static void __init xen_write_cr3_init(unsigned long cr3)
-{
- unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
-
- BUG_ON(read_cr3() != __pa(initial_page_table));
- BUG_ON(cr3 != __pa(swapper_pg_dir));
-
- /*
- * We are switching to swapper_pg_dir for the first time (from
- * initial_page_table) and therefore need to mark that page
- * read-only and then pin it.
- *
- * Xen disallows sharing of kernel PMDs for PAE
- * guests. Therefore we must copy the kernel PMD from
- * initial_page_table into a new kernel PMD to be used in
- * swapper_pg_dir.
- */
- swapper_kernel_pmd =
- extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
- memcpy(swapper_kernel_pmd, initial_kernel_pmd,
- sizeof(pmd_t) * PTRS_PER_PMD);
- swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
- __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
- set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
-
- set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
- xen_write_cr3(cr3);
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
-
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
- set_page_prot(initial_page_table, PAGE_KERNEL);
- set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
-
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
-}
-
-pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
- unsigned long max_pfn)
-{
- pmd_t *kernel_pmd;
-
- initial_kernel_pmd =
- extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
-
- max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
- xen_start_info->nr_pt_frames * PAGE_SIZE +
- 512*1024);
-
- kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
- memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
-
- xen_map_identity_early(initial_kernel_pmd, max_pfn);
-
- memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
- initial_page_table[KERNEL_PGD_BOUNDARY] =
- __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
-
- set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
- set_page_prot(initial_page_table, PAGE_KERNEL_RO);
- set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
-
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
- xen_write_cr3(__pa(initial_page_table));
-
- memblock_reserve(__pa(xen_start_info->pt_base),
- xen_start_info->nr_pt_frames * PAGE_SIZE);
-
- return initial_page_table;
-}
-#endif /* CONFIG_X86_64 */
-
-static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
-static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
-
-static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
-{
- pte_t pte;
-
- phys >>= PAGE_SHIFT;
-
- switch (idx) {
- case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-#ifdef CONFIG_X86_F00F_BUG
- case FIX_F00F_IDT:
-#endif
-#ifdef CONFIG_X86_32
- case FIX_WP_TEST:
- case FIX_VDSO:
-# ifdef CONFIG_HIGHMEM
- case FIX_KMAP_BEGIN ... FIX_KMAP_END:
-# endif
-#else
- case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
- case VVAR_PAGE:
-#endif
- case FIX_TEXT_POKE0:
- case FIX_TEXT_POKE1:
- /* All local page mappings */
- pte = pfn_pte(phys, prot);
- break;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- case FIX_APIC_BASE: /* maps dummy local APIC */
- pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
- break;
-#endif
-
-#ifdef CONFIG_X86_IO_APIC
- case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
- /*
- * We just don't map the IO APIC - all access is via
- * hypercalls. Keep the address in the pte for reference.
- */
- pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL);
- break;
-#endif
-
- case FIX_PARAVIRT_BOOTMAP:
- /* This is an MFN, but it isn't an IO mapping from the
- IO domain */
- pte = mfn_pte(phys, prot);
- break;
-
- default:
- /* By default, set_fixmap is used for hardware mappings */
- pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
- break;
- }
-
- __native_set_fixmap(idx, pte);
-
-#ifdef CONFIG_X86_64
- /* Replicate changes to map the vsyscall page into the user
- pagetable vsyscall mapping. */
- if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
- idx == VVAR_PAGE) {
- unsigned long vaddr = __fix_to_virt(idx);
- set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
- }
-#endif
-}
-
-void __init xen_ident_map_ISA(void)
-{
- unsigned long pa;
-
- /*
- * If we're dom0, then linear map the ISA machine addresses into
- * the kernel's address space.
- */
- if (!xen_initial_domain())
- return;
-
- xen_raw_printk("Xen: setup ISA identity maps\n");
-
- for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
- pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
-
- if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
- BUG();
- }
-
- xen_flush_tlb();
-}
-
-static void __init xen_post_allocator_init(void)
-{
- pv_mmu_ops.set_pte = xen_set_pte;
- pv_mmu_ops.set_pmd = xen_set_pmd;
- pv_mmu_ops.set_pud = xen_set_pud;
-#if PAGETABLE_LEVELS == 4
- pv_mmu_ops.set_pgd = xen_set_pgd;
-#endif
-
- /* This will work as long as patching hasn't happened yet
- (which it hasn't) */
- pv_mmu_ops.alloc_pte = xen_alloc_pte;
- pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
- pv_mmu_ops.release_pte = xen_release_pte;
- pv_mmu_ops.release_pmd = xen_release_pmd;
-#if PAGETABLE_LEVELS == 4
- pv_mmu_ops.alloc_pud = xen_alloc_pud;
- pv_mmu_ops.release_pud = xen_release_pud;
-#endif
-
-#ifdef CONFIG_X86_64
- SetPagePinned(virt_to_page(level3_user_vsyscall));
-#endif
- xen_mark_init_mm_pinned();
-}
-
-static void xen_leave_lazy_mmu(void)
-{
- preempt_disable();
- xen_mc_flush();
- paravirt_leave_lazy_mmu();
- preempt_enable();
-}
-
-static const struct pv_mmu_ops xen_mmu_ops __initconst = {
- .read_cr2 = xen_read_cr2,
- .write_cr2 = xen_write_cr2,
-
- .read_cr3 = xen_read_cr3,
-#ifdef CONFIG_X86_32
- .write_cr3 = xen_write_cr3_init,
-#else
- .write_cr3 = xen_write_cr3,
-#endif
-
- .flush_tlb_user = xen_flush_tlb,
- .flush_tlb_kernel = xen_flush_tlb,
- .flush_tlb_single = xen_flush_tlb_single,
- .flush_tlb_others = xen_flush_tlb_others,
-
- .pte_update = paravirt_nop,
- .pte_update_defer = paravirt_nop,
-
- .pgd_alloc = xen_pgd_alloc,
- .pgd_free = xen_pgd_free,
-
- .alloc_pte = xen_alloc_pte_init,
- .release_pte = xen_release_pte_init,
- .alloc_pmd = xen_alloc_pmd_init,
- .release_pmd = xen_release_pmd_init,
-
- .set_pte = xen_set_pte_init,
- .set_pte_at = xen_set_pte_at,
- .set_pmd = xen_set_pmd_hyper,
-
- .ptep_modify_prot_start = __ptep_modify_prot_start,
- .ptep_modify_prot_commit = __ptep_modify_prot_commit,
-
- .pte_val = PV_CALLEE_SAVE(xen_pte_val),
- .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
-
- .make_pte = PV_CALLEE_SAVE(xen_make_pte),
- .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
-
-#ifdef CONFIG_X86_PAE
- .set_pte_atomic = xen_set_pte_atomic,
- .pte_clear = xen_pte_clear,
- .pmd_clear = xen_pmd_clear,
-#endif /* CONFIG_X86_PAE */
- .set_pud = xen_set_pud_hyper,
-
- .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
- .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
-
-#if PAGETABLE_LEVELS == 4
- .pud_val = PV_CALLEE_SAVE(xen_pud_val),
- .make_pud = PV_CALLEE_SAVE(xen_make_pud),
- .set_pgd = xen_set_pgd_hyper,
-
- .alloc_pud = xen_alloc_pmd_init,
- .release_pud = xen_release_pmd_init,
-#endif /* PAGETABLE_LEVELS == 4 */
-
- .activate_mm = xen_activate_mm,
- .dup_mmap = xen_dup_mmap,
- .exit_mmap = xen_exit_mmap,
-
- .lazy_mode = {
- .enter = paravirt_enter_lazy_mmu,
- .leave = xen_leave_lazy_mmu,
- },
-
- .set_fixmap = xen_set_fixmap,
-};
-
-void __init xen_init_mmu_ops(void)
-{
- x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
- x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
- x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
- pv_mmu_ops = xen_mmu_ops;
-
- memset(dummy_mapping, 0xff, PAGE_SIZE);
- memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
-}
-
-/* Protected by xen_reservation_lock. */
-#define MAX_CONTIG_ORDER 9 /* 2MB */
-static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
-
-#define VOID_PTE (mfn_pte(0, __pgprot(0)))
-static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
- unsigned long *in_frames,
- unsigned long *out_frames)
-{
- int i;
- struct multicall_space mcs;
-
- xen_mc_batch();
- for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
- mcs = __xen_mc_entry(0);
-
- if (in_frames)
- in_frames[i] = virt_to_mfn(vaddr);
-
- MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
- __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
-
- if (out_frames)
- out_frames[i] = virt_to_pfn(vaddr);
- }
- xen_mc_issue(0);
-}
-
-/*
- * Update the pfn-to-mfn mappings for a virtual address range, either to
- * point to an array of mfns, or contiguously from a single starting
- * mfn.
- */
-static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
- unsigned long *mfns,
- unsigned long first_mfn)
-{
- unsigned i, limit;
- unsigned long mfn;
-
- xen_mc_batch();
-
- limit = 1u << order;
- for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
- struct multicall_space mcs;
- unsigned flags;
-
- mcs = __xen_mc_entry(0);
- if (mfns)
- mfn = mfns[i];
- else
- mfn = first_mfn + i;
-
- if (i < (limit - 1))
- flags = 0;
- else {
- if (order == 0)
- flags = UVMF_INVLPG | UVMF_ALL;
- else
- flags = UVMF_TLB_FLUSH | UVMF_ALL;
- }
-
- MULTI_update_va_mapping(mcs.mc, vaddr,
- mfn_pte(mfn, PAGE_KERNEL), flags);
-
- set_phys_to_machine(virt_to_pfn(vaddr), mfn);
- }
-
- xen_mc_issue(0);
-}
-
-/*
- * Perform the hypercall to exchange a region of our pfns to point to
- * memory with the required contiguous alignment. Takes the pfns as
- * input, and populates mfns as output.
- *
- * Returns a success code indicating whether the hypervisor was able to
- * satisfy the request or not.
- */
-static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
- unsigned long *pfns_in,
- unsigned long extents_out,
- unsigned int order_out,
- unsigned long *mfns_out,
- unsigned int address_bits)
-{
- long rc;
- int success;
-
- struct xen_memory_exchange exchange = {
- .in = {
- .nr_extents = extents_in,
- .extent_order = order_in,
- .extent_start = pfns_in,
- .domid = DOMID_SELF
- },
- .out = {
- .nr_extents = extents_out,
- .extent_order = order_out,
- .extent_start = mfns_out,
- .address_bits = address_bits,
- .domid = DOMID_SELF
- }
- };
-
- BUG_ON(extents_in << order_in != extents_out << order_out);
-
- rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
- success = (exchange.nr_exchanged == extents_in);
-
- BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
- BUG_ON(success && (rc != 0));
-
- return success;
-}
-
-int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
- unsigned int address_bits)
-{
- unsigned long *in_frames = discontig_frames, out_frame;
- unsigned long flags;
- int success;
-
- /*
- * Currently an auto-translated guest will not perform I/O, nor will
- * it require PAE page directories below 4GB. Therefore any calls to
- * this function are redundant and can be ignored.
- */
-
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return 0;
-
- if (unlikely(order > MAX_CONTIG_ORDER))
- return -ENOMEM;
-
- memset((void *) vstart, 0, PAGE_SIZE << order);
-
- spin_lock_irqsave(&xen_reservation_lock, flags);
-
- /* 1. Zap current PTEs, remembering MFNs. */
- xen_zap_pfn_range(vstart, order, in_frames, NULL);
-
- /* 2. Get a new contiguous memory extent. */
- out_frame = virt_to_pfn(vstart);
- success = xen_exchange_memory(1UL << order, 0, in_frames,
- 1, order, &out_frame,
- address_bits);
-
- /* 3. Map the new extent in place of old pages. */
- if (success)
- xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
- else
- xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
-
- spin_unlock_irqrestore(&xen_reservation_lock, flags);
-
- return success ? 0 : -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
-
-void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
-{
- unsigned long *out_frames = discontig_frames, in_frame;
- unsigned long flags;
- int success;
-
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return;
-
- if (unlikely(order > MAX_CONTIG_ORDER))
- return;
-
- memset((void *) vstart, 0, PAGE_SIZE << order);
-
- spin_lock_irqsave(&xen_reservation_lock, flags);
-
- /* 1. Find start MFN of contiguous extent. */
- in_frame = virt_to_mfn(vstart);
-
- /* 2. Zap current PTEs. */
- xen_zap_pfn_range(vstart, order, NULL, out_frames);
-
- /* 3. Do the exchange for non-contiguous MFNs. */
- success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
- 0, out_frames, 0);
-
- /* 4. Map new pages in place of old pages. */
- if (success)
- xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
- else
- xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
-
- spin_unlock_irqrestore(&xen_reservation_lock, flags);
-}
-EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
-
-#ifdef CONFIG_XEN_PVHVM
-static void xen_hvm_exit_mmap(struct mm_struct *mm)
-{
- struct xen_hvm_pagetable_dying a;
- int rc;
-
- a.domid = DOMID_SELF;
- a.gpa = __pa(mm->pgd);
- rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
- WARN_ON_ONCE(rc < 0);
-}
-
-static int is_pagetable_dying_supported(void)
-{
- struct xen_hvm_pagetable_dying a;
- int rc = 0;
-
- a.domid = DOMID_SELF;
- a.gpa = 0x00;
- rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
- if (rc < 0) {
- printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
- return 0;
- }
- return 1;
-}
-
-void __init xen_hvm_init_mmu_ops(void)
-{
- if (is_pagetable_dying_supported())
- pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
-}
-#endif
-
-#define REMAP_BATCH_SIZE 16
-
-struct remap_data {
- unsigned long mfn;
- pgprot_t prot;
- struct mmu_update *mmu_update;
-};
-
-static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
- unsigned long addr, void *data)
-{
- struct remap_data *rmd = data;
- pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
-
- rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
- rmd->mmu_update->val = pte_val_ma(pte);
- rmd->mmu_update++;
-
- return 0;
-}
-
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
- unsigned long addr,
- unsigned long mfn, int nr,
- pgprot_t prot, unsigned domid)
-{
- struct remap_data rmd;
- struct mmu_update mmu_update[REMAP_BATCH_SIZE];
- int batch;
- unsigned long range;
- int err = 0;
-
- prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
-
- BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
- (VM_PFNMAP | VM_RESERVED | VM_IO)));
-
- rmd.mfn = mfn;
- rmd.prot = prot;
-
- while (nr) {
- batch = min(REMAP_BATCH_SIZE, nr);
- range = (unsigned long)batch << PAGE_SHIFT;
-
- rmd.mmu_update = mmu_update;
- err = apply_to_page_range(vma->vm_mm, addr, range,
- remap_area_mfn_pte_fn, &rmd);
- if (err)
- goto out;
-
- err = -EFAULT;
- if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
- goto out;
-
- nr -= batch;
- addr += range;
- }
-
- err = 0;
-out:
-
- flush_tlb_all();
-
- return err;
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
diff --git a/ANDROID_3.4.5/arch/x86/xen/mmu.h b/ANDROID_3.4.5/arch/x86/xen/mmu.h
deleted file mode 100644
index 73809bb9..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/mmu.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _XEN_MMU_H
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-enum pt_level {
- PT_PGD,
- PT_PUD,
- PT_PMD,
- PT_PTE
-};
-
-
-bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-
-void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte);
-
-unsigned long xen_read_cr2_direct(void);
-
-extern void xen_init_mmu_ops(void);
-extern void xen_hvm_init_mmu_ops(void);
-#endif /* _XEN_MMU_H */
diff --git a/ANDROID_3.4.5/arch/x86/xen/multicalls.c b/ANDROID_3.4.5/arch/x86/xen/multicalls.c
deleted file mode 100644
index 0d82003e..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/multicalls.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Xen hypercall batching.
- *
- * Xen allows multiple hypercalls to be issued at once, using the
- * multicall interface. This allows the cost of trapping into the
- * hypervisor to be amortized over several calls.
- *
- * This file implements a simple interface for multicalls. There's a
- * per-cpu buffer of outstanding multicalls. When you want to queue a
- * multicall for issuing, you can allocate a multicall slot for the
- * call and its arguments, along with storage for space which is
- * pointed to by the arguments (for passing pointers to structures,
- * etc). When the multicall is actually issued, all the space for the
- * commands and allocated memory is freed for reuse.
- *
- * Multicalls are flushed whenever any of the buffers get full, or
- * when explicitly requested. There's no way to get per-multicall
- * return results back. It will BUG if any of the multicalls fail.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-#include <linux/percpu.h>
-#include <linux/hardirq.h>
-#include <linux/debugfs.h>
-
-#include <asm/xen/hypercall.h>
-
-#include "multicalls.h"
-#include "debugfs.h"
-
-#define MC_BATCH 32
-
-#define MC_DEBUG 0
-
-#define MC_ARGS (MC_BATCH * 16)
-
-
-struct mc_buffer {
- unsigned mcidx, argidx, cbidx;
- struct multicall_entry entries[MC_BATCH];
-#if MC_DEBUG
- struct multicall_entry debug[MC_BATCH];
- void *caller[MC_BATCH];
-#endif
- unsigned char args[MC_ARGS];
- struct callback {
- void (*fn)(void *);
- void *data;
- } callbacks[MC_BATCH];
-};
-
-static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
-DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
-
-void xen_mc_flush(void)
-{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
- struct multicall_entry *mc;
- int ret = 0;
- unsigned long flags;
- int i;
-
- BUG_ON(preemptible());
-
- /* Disable interrupts in case someone comes in and queues
- something in the middle */
- local_irq_save(flags);
-
- trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
-
- switch (b->mcidx) {
- case 0:
- /* no-op */
- BUG_ON(b->argidx != 0);
- break;
-
- case 1:
- /* Singleton multicall - bypass multicall machinery
- and just do the call directly. */
- mc = &b->entries[0];
-
- mc->result = privcmd_call(mc->op,
- mc->args[0], mc->args[1], mc->args[2],
- mc->args[3], mc->args[4]);
- ret = mc->result < 0;
- break;
-
- default:
-#if MC_DEBUG
- memcpy(b->debug, b->entries,
- b->mcidx * sizeof(struct multicall_entry));
-#endif
-
- if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
- BUG();
- for (i = 0; i < b->mcidx; i++)
- if (b->entries[i].result < 0)
- ret++;
-
-#if MC_DEBUG
- if (ret) {
- printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
- ret, smp_processor_id());
- dump_stack();
- for (i = 0; i < b->mcidx; i++) {
- printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
- i+1, b->mcidx,
- b->debug[i].op,
- b->debug[i].args[0],
- b->entries[i].result,
- b->caller[i]);
- }
- }
-#endif
- }
-
- b->mcidx = 0;
- b->argidx = 0;
-
- for (i = 0; i < b->cbidx; i++) {
- struct callback *cb = &b->callbacks[i];
-
- (*cb->fn)(cb->data);
- }
- b->cbidx = 0;
-
- local_irq_restore(flags);
-
- WARN_ON(ret);
-}
-
-struct multicall_space __xen_mc_entry(size_t args)
-{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
- struct multicall_space ret;
- unsigned argidx = roundup(b->argidx, sizeof(u64));
-
- trace_xen_mc_entry_alloc(args);
-
- BUG_ON(preemptible());
- BUG_ON(b->argidx >= MC_ARGS);
-
- if (unlikely(b->mcidx == MC_BATCH ||
- (argidx + args) >= MC_ARGS)) {
- trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ?
- XEN_MC_FL_BATCH : XEN_MC_FL_ARGS);
- xen_mc_flush();
- argidx = roundup(b->argidx, sizeof(u64));
- }
-
- ret.mc = &b->entries[b->mcidx];
-#if MC_DEBUG
- b->caller[b->mcidx] = __builtin_return_address(0);
-#endif
- b->mcidx++;
- ret.args = &b->args[argidx];
- b->argidx = argidx + args;
-
- BUG_ON(b->argidx >= MC_ARGS);
- return ret;
-}
-
-struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
-{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
- struct multicall_space ret = { NULL, NULL };
-
- BUG_ON(preemptible());
- BUG_ON(b->argidx >= MC_ARGS);
-
- if (unlikely(b->mcidx == 0 ||
- b->entries[b->mcidx - 1].op != op)) {
- trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP);
- goto out;
- }
-
- if (unlikely((b->argidx + size) >= MC_ARGS)) {
- trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE);
- goto out;
- }
-
- ret.mc = &b->entries[b->mcidx - 1];
- ret.args = &b->args[b->argidx];
- b->argidx += size;
-
- BUG_ON(b->argidx >= MC_ARGS);
-
- trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK);
-out:
- return ret;
-}
-
-void xen_mc_callback(void (*fn)(void *), void *data)
-{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
- struct callback *cb;
-
- if (b->cbidx == MC_BATCH) {
- trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK);
- xen_mc_flush();
- }
-
- trace_xen_mc_callback(fn, data);
-
- cb = &b->callbacks[b->cbidx++];
- cb->fn = fn;
- cb->data = data;
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/multicalls.h b/ANDROID_3.4.5/arch/x86/xen/multicalls.h
deleted file mode 100644
index 9c2e74f9..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/multicalls.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _XEN_MULTICALLS_H
-#define _XEN_MULTICALLS_H
-
-#include <trace/events/xen.h>
-
-#include "xen-ops.h"
-
-/* Multicalls */
-struct multicall_space
-{
- struct multicall_entry *mc;
- void *args;
-};
-
-/* Allocate room for a multicall and its args */
-struct multicall_space __xen_mc_entry(size_t args);
-
-DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
-
-/* Call to start a batch of multiple __xen_mc_entry()s. Must be
- paired with xen_mc_issue() */
-static inline void xen_mc_batch(void)
-{
- unsigned long flags;
-
- /* need to disable interrupts until this entry is complete */
- local_irq_save(flags);
- trace_xen_mc_batch(paravirt_get_lazy_mode());
- __this_cpu_write(xen_mc_irq_flags, flags);
-}
-
-static inline struct multicall_space xen_mc_entry(size_t args)
-{
- xen_mc_batch();
- return __xen_mc_entry(args);
-}
-
-/* Flush all pending multicalls */
-void xen_mc_flush(void);
-
-/* Issue a multicall if we're not in a lazy mode */
-static inline void xen_mc_issue(unsigned mode)
-{
- trace_xen_mc_issue(mode);
-
- if ((paravirt_get_lazy_mode() & mode) == 0)
- xen_mc_flush();
-
- /* restore flags saved in xen_mc_batch */
- local_irq_restore(this_cpu_read(xen_mc_irq_flags));
-}
-
-/* Set up a callback to be called when the current batch is flushed */
-void xen_mc_callback(void (*fn)(void *), void *data);
-
-/*
- * Try to extend the arguments of the previous multicall command. The
- * previous command's op must match. If it does, then it attempts to
- * extend the argument space allocated to the multicall entry by
- * arg_size bytes.
- *
- * The returned multicall_space will return with mc pointing to the
- * command on success, or NULL on failure, and args pointing to the
- * newly allocated space.
- */
-struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size);
-
-#endif /* _XEN_MULTICALLS_H */
diff --git a/ANDROID_3.4.5/arch/x86/xen/p2m.c b/ANDROID_3.4.5/arch/x86/xen/p2m.c
deleted file mode 100644
index 1b267e75..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/p2m.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * Xen leaves the responsibility for maintaining p2m mappings to the
- * guests themselves, but it must also access and update the p2m array
- * during suspend/resume when all the pages are reallocated.
- *
- * The p2m table is logically a flat array, but we implement it as a
- * three-level tree to allow the address space to be sparse.
- *
- * Xen
- * |
- * p2m_top p2m_top_mfn
- * / \ / \
- * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
- * / \ / \ / /
- * p2m p2m p2m p2m p2m p2m p2m ...
- *
- * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
- *
- * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
- * maximum representable pseudo-physical address space is:
- * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
- *
- * P2M_PER_PAGE depends on the architecture, as a mfn is always
- * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
- * 512 and 1024 entries respectively.
- *
- * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
- *
- * However not all entries are filled with MFNs. Specifically for all other
- * leaf entries, or for the top root, or middle one, for which there is a void
- * entry, we assume it is "missing". So (for example)
- * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
- *
- * We also have the possibility of setting 1-1 mappings on certain regions, so
- * that:
- * pfn_to_mfn(0xc0000)=0xc0000
- *
- * The benefit of this is, that we can assume for non-RAM regions (think
- * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
- * get the PFN value to match the MFN.
- *
- * For this to work efficiently we have one new page p2m_identity and
- * allocate (via reserved_brk) any other pages we need to cover the sides
- * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
- * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
- * no other fancy value).
- *
- * On lookup we spot that the entry points to p2m_identity and return the
- * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
- * If the entry points to an allocated page, we just proceed as before and
- * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
- * appropriate functions (pfn_to_mfn).
- *
- * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
- * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
- * non-identity pfn. To protect ourselves against we elect to set (and get) the
- * IDENTITY_FRAME_BIT on all identity mapped PFNs.
- *
- * This simplistic diagram is used to explain the more subtle piece of code.
- * There is also a digram of the P2M at the end that can help.
- * Imagine your E820 looking as so:
- *
- * 1GB 2GB
- * /-------------------+---------\/----\ /----------\ /---+-----\
- * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
- * \-------------------+---------/\----/ \----------/ \---+-----/
- * ^- 1029MB ^- 2001MB
- *
- * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
- * 2048MB = 524288 (0x80000)]
- *
- * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
- * is actually not present (would have to kick the balloon driver to put it in).
- *
- * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
- * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
- * of the PFN and the end PFN (263424 and 512256 respectively). The first step
- * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
- * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
- * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
- * to end pfn. We reserve_brk top leaf pages if they are missing (means they
- * point to p2m_mid_missing).
- *
- * With the E820 example above, 263424 is not 1GB aligned so we allocate a
- * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
- * Each entry in the allocate page is "missing" (points to p2m_missing).
- *
- * Next stage is to determine if we need to do a more granular boundary check
- * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
- * We check if the start pfn and end pfn violate that boundary check, and if
- * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
- * granularity of setting which PFNs are missing and which ones are identity.
- * In our example 263424 and 512256 both fail the check so we reserve_brk two
- * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
- * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
- *
- * At this point we would at minimum reserve_brk one page, but could be up to
- * three. Each call to set_phys_range_identity has at maximum a three page
- * cost. If we were to query the P2M at this stage, all those entries from
- * start PFN through end PFN (so 1029MB -> 2001MB) would return
- * INVALID_P2M_ENTRY ("missing").
- *
- * The next step is to walk from the start pfn to the end pfn setting
- * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
- * If we find that the middle leaf is pointing to p2m_missing we can swap it
- * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
- * point we do not need to worry about boundary aligment (so no need to
- * reserve_brk a middle page, figure out which PFNs are "missing" and which
- * ones are identity), as that has been done earlier. If we find that the
- * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
- * that page (which covers 512 PFNs) and set the appropriate PFN with
- * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
- * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
- * IDENTITY_FRAME_BIT set.
- *
- * All other regions that are void (or not filled) either point to p2m_missing
- * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
- * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
- * contain the INVALID_P2M_ENTRY value and are considered "missing."
- *
- * This is what the p2m ends up looking (for the E820 above) with this
- * fabulous drawing:
- *
- * p2m /--------------\
- * /-----\ | &mfn_list[0],| /-----------------\
- * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
- * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
- * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
- * |-----| \ | [p2m_identity]+\\ | .... |
- * | 2 |--\ \-------------------->| ... | \\ \----------------/
- * |-----| \ \---------------/ \\
- * | 3 |\ \ \\ p2m_identity
- * |-----| \ \-------------------->/---------------\ /-----------------\
- * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
- * \-----/ / | [p2m_identity]+-->| ..., ~0 |
- * / /---------------\ | .... | \-----------------/
- * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
- * / | IDENTITY[@256]|<----/ \---------------/
- * / | ~0, ~0, .... |
- * | \---------------/
- * |
- * p2m_missing p2m_missing
- * /------------------\ /------------\
- * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
- * | [p2m_mid_missing]+---->| ..., ~0 |
- * \------------------/ \------------/
- *
- * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/hash.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-
-#include <asm/cache.h>
-#include <asm/setup.h>
-
-#include <asm/xen/page.h>
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-#include <xen/grant_table.h>
-
-#include "multicalls.h"
-#include "xen-ops.h"
-
-static void __init m2p_override_init(void);
-
-unsigned long xen_max_p2m_pfn __read_mostly;
-
-#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
-#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
-#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
-
-#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
-
-/* Placeholders for holes in the address space */
-static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
-
-static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
-static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
-
-static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
-
-RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-
-/* We might hit two boundary violations at the start and end, at max each
- * boundary violation will require three middle nodes. */
-RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
-
-static inline unsigned p2m_top_index(unsigned long pfn)
-{
- BUG_ON(pfn >= MAX_P2M_PFN);
- return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
-}
-
-static inline unsigned p2m_mid_index(unsigned long pfn)
-{
- return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
-}
-
-static inline unsigned p2m_index(unsigned long pfn)
-{
- return pfn % P2M_PER_PAGE;
-}
-
-static void p2m_top_init(unsigned long ***top)
-{
- unsigned i;
-
- for (i = 0; i < P2M_TOP_PER_PAGE; i++)
- top[i] = p2m_mid_missing;
-}
-
-static void p2m_top_mfn_init(unsigned long *top)
-{
- unsigned i;
-
- for (i = 0; i < P2M_TOP_PER_PAGE; i++)
- top[i] = virt_to_mfn(p2m_mid_missing_mfn);
-}
-
-static void p2m_top_mfn_p_init(unsigned long **top)
-{
- unsigned i;
-
- for (i = 0; i < P2M_TOP_PER_PAGE; i++)
- top[i] = p2m_mid_missing_mfn;
-}
-
-static void p2m_mid_init(unsigned long **mid)
-{
- unsigned i;
-
- for (i = 0; i < P2M_MID_PER_PAGE; i++)
- mid[i] = p2m_missing;
-}
-
-static void p2m_mid_mfn_init(unsigned long *mid)
-{
- unsigned i;
-
- for (i = 0; i < P2M_MID_PER_PAGE; i++)
- mid[i] = virt_to_mfn(p2m_missing);
-}
-
-static void p2m_init(unsigned long *p2m)
-{
- unsigned i;
-
- for (i = 0; i < P2M_MID_PER_PAGE; i++)
- p2m[i] = INVALID_P2M_ENTRY;
-}
-
-/*
- * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
- *
- * This is called both at boot time, and after resuming from suspend:
- * - At boot time we're called very early, and must use extend_brk()
- * to allocate memory.
- *
- * - After resume we're called from within stop_machine, but the mfn
- * tree should alreay be completely allocated.
- */
-void __ref xen_build_mfn_list_list(void)
-{
- unsigned long pfn;
-
- /* Pre-initialize p2m_top_mfn to be completely missing */
- if (p2m_top_mfn == NULL) {
- p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(p2m_mid_missing_mfn);
-
- p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_top_mfn_p_init(p2m_top_mfn_p);
-
- p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_top_mfn_init(p2m_top_mfn);
- } else {
- /* Reinitialise, mfn's all change after migration */
- p2m_mid_mfn_init(p2m_mid_missing_mfn);
- }
-
- for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
- unsigned topidx = p2m_top_index(pfn);
- unsigned mididx = p2m_mid_index(pfn);
- unsigned long **mid;
- unsigned long *mid_mfn_p;
-
- mid = p2m_top[topidx];
- mid_mfn_p = p2m_top_mfn_p[topidx];
-
- /* Don't bother allocating any mfn mid levels if
- * they're just missing, just update the stored mfn,
- * since all could have changed over a migrate.
- */
- if (mid == p2m_mid_missing) {
- BUG_ON(mididx);
- BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
- p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
- pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
- continue;
- }
-
- if (mid_mfn_p == p2m_mid_missing_mfn) {
- /*
- * XXX boot-time only! We should never find
- * missing parts of the mfn tree after
- * runtime. extend_brk() will BUG if we call
- * it too late.
- */
- mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(mid_mfn_p);
-
- p2m_top_mfn_p[topidx] = mid_mfn_p;
- }
-
- p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
- mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
- }
-}
-
-void xen_setup_mfn_list_list(void)
-{
- BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
-
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- virt_to_mfn(p2m_top_mfn);
- HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
-}
-
-/* Set up p2m_top to point to the domain-builder provided p2m pages */
-void __init xen_build_dynamic_phys_to_machine(void)
-{
- unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
- unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
- unsigned long pfn;
-
- xen_max_p2m_pfn = max_pfn;
-
- p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_init(p2m_missing);
-
- p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_init(p2m_mid_missing);
-
- p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_top_init(p2m_top);
-
- p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_init(p2m_identity);
-
- /*
- * The domain builder gives us a pre-constructed p2m array in
- * mfn_list for all the pages initially given to us, so we just
- * need to graft that into our tree structure.
- */
- for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
- unsigned topidx = p2m_top_index(pfn);
- unsigned mididx = p2m_mid_index(pfn);
-
- if (p2m_top[topidx] == p2m_mid_missing) {
- unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_init(mid);
-
- p2m_top[topidx] = mid;
- }
-
- /*
- * As long as the mfn_list has enough entries to completely
- * fill a p2m page, pointing into the array is ok. But if
- * not the entries beyond the last pfn will be undefined.
- */
- if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
- unsigned long p2midx;
-
- p2midx = max_pfn % P2M_PER_PAGE;
- for ( ; p2midx < P2M_PER_PAGE; p2midx++)
- mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY;
- }
- p2m_top[topidx][mididx] = &mfn_list[pfn];
- }
-
- m2p_override_init();
-}
-
-unsigned long get_phys_to_machine(unsigned long pfn)
-{
- unsigned topidx, mididx, idx;
-
- if (unlikely(pfn >= MAX_P2M_PFN))
- return INVALID_P2M_ENTRY;
-
- topidx = p2m_top_index(pfn);
- mididx = p2m_mid_index(pfn);
- idx = p2m_index(pfn);
-
- /*
- * The INVALID_P2M_ENTRY is filled in both p2m_*identity
- * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
- * would be wrong.
- */
- if (p2m_top[topidx][mididx] == p2m_identity)
- return IDENTITY_FRAME(pfn);
-
- return p2m_top[topidx][mididx][idx];
-}
-EXPORT_SYMBOL_GPL(get_phys_to_machine);
-
-static void *alloc_p2m_page(void)
-{
- return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
-}
-
-static void free_p2m_page(void *p)
-{
- free_page((unsigned long)p);
-}
-
-/*
- * Fully allocate the p2m structure for a given pfn. We need to check
- * that both the top and mid levels are allocated, and make sure the
- * parallel mfn tree is kept in sync. We may race with other cpus, so
- * the new pages are installed with cmpxchg; if we lose the race then
- * simply free the page we allocated and use the one that's there.
- */
-static bool alloc_p2m(unsigned long pfn)
-{
- unsigned topidx, mididx;
- unsigned long ***top_p, **mid;
- unsigned long *top_mfn_p, *mid_mfn;
-
- topidx = p2m_top_index(pfn);
- mididx = p2m_mid_index(pfn);
-
- top_p = &p2m_top[topidx];
- mid = *top_p;
-
- if (mid == p2m_mid_missing) {
- /* Mid level is missing, allocate a new one */
- mid = alloc_p2m_page();
- if (!mid)
- return false;
-
- p2m_mid_init(mid);
-
- if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
- free_p2m_page(mid);
- }
-
- top_mfn_p = &p2m_top_mfn[topidx];
- mid_mfn = p2m_top_mfn_p[topidx];
-
- BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
-
- if (mid_mfn == p2m_mid_missing_mfn) {
- /* Separately check the mid mfn level */
- unsigned long missing_mfn;
- unsigned long mid_mfn_mfn;
-
- mid_mfn = alloc_p2m_page();
- if (!mid_mfn)
- return false;
-
- p2m_mid_mfn_init(mid_mfn);
-
- missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
- mid_mfn_mfn = virt_to_mfn(mid_mfn);
- if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
- free_p2m_page(mid_mfn);
- else
- p2m_top_mfn_p[topidx] = mid_mfn;
- }
-
- if (p2m_top[topidx][mididx] == p2m_identity ||
- p2m_top[topidx][mididx] == p2m_missing) {
- /* p2m leaf page is missing */
- unsigned long *p2m;
- unsigned long *p2m_orig = p2m_top[topidx][mididx];
-
- p2m = alloc_p2m_page();
- if (!p2m)
- return false;
-
- p2m_init(p2m);
-
- if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
- free_p2m_page(p2m);
- else
- mid_mfn[mididx] = virt_to_mfn(p2m);
- }
-
- return true;
-}
-
-static bool __init __early_alloc_p2m(unsigned long pfn)
-{
- unsigned topidx, mididx, idx;
-
- topidx = p2m_top_index(pfn);
- mididx = p2m_mid_index(pfn);
- idx = p2m_index(pfn);
-
- /* Pfff.. No boundary cross-over, lets get out. */
- if (!idx)
- return false;
-
- WARN(p2m_top[topidx][mididx] == p2m_identity,
- "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
- topidx, mididx);
-
- /*
- * Could be done by xen_build_dynamic_phys_to_machine..
- */
- if (p2m_top[topidx][mididx] != p2m_missing)
- return false;
-
- /* Boundary cross-over for the edges: */
- if (idx) {
- unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
- unsigned long *mid_mfn_p;
-
- p2m_init(p2m);
-
- p2m_top[topidx][mididx] = p2m;
-
- /* For save/restore we need to MFN of the P2M saved */
-
- mid_mfn_p = p2m_top_mfn_p[topidx];
- WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
- "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
- topidx, mididx);
- mid_mfn_p[mididx] = virt_to_mfn(p2m);
-
- }
- return idx != 0;
-}
-unsigned long __init set_phys_range_identity(unsigned long pfn_s,
- unsigned long pfn_e)
-{
- unsigned long pfn;
-
- if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
- return 0;
-
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
- return pfn_e - pfn_s;
-
- if (pfn_s > pfn_e)
- return 0;
-
- for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
- pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
- pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
- {
- unsigned topidx = p2m_top_index(pfn);
- unsigned long *mid_mfn_p;
- unsigned long **mid;
-
- mid = p2m_top[topidx];
- mid_mfn_p = p2m_top_mfn_p[topidx];
- if (mid == p2m_mid_missing) {
- mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-
- p2m_mid_init(mid);
-
- p2m_top[topidx] = mid;
-
- BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
- }
- /* And the save/restore P2M tables.. */
- if (mid_mfn_p == p2m_mid_missing_mfn) {
- mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
- p2m_mid_mfn_init(mid_mfn_p);
-
- p2m_top_mfn_p[topidx] = mid_mfn_p;
- p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
- /* Note: we don't set mid_mfn_p[midix] here,
- * look in __early_alloc_p2m */
- }
- }
-
- __early_alloc_p2m(pfn_s);
- __early_alloc_p2m(pfn_e);
-
- for (pfn = pfn_s; pfn < pfn_e; pfn++)
- if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
- break;
-
- if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
- "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
- (pfn_e - pfn_s) - (pfn - pfn_s)))
- printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
-
- return pfn - pfn_s;
-}
-
-/* Try to install p2m mapping; fail if intermediate bits missing */
-bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
- unsigned topidx, mididx, idx;
-
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return true;
- }
- if (unlikely(pfn >= MAX_P2M_PFN)) {
- BUG_ON(mfn != INVALID_P2M_ENTRY);
- return true;
- }
-
- topidx = p2m_top_index(pfn);
- mididx = p2m_mid_index(pfn);
- idx = p2m_index(pfn);
-
- /* For sparse holes were the p2m leaf has real PFN along with
- * PCI holes, stick in the PFN as the MFN value.
- */
- if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
- if (p2m_top[topidx][mididx] == p2m_identity)
- return true;
-
- /* Swap over from MISSING to IDENTITY if needed. */
- if (p2m_top[topidx][mididx] == p2m_missing) {
- WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
- p2m_identity) != p2m_missing);
- return true;
- }
- }
-
- if (p2m_top[topidx][mididx] == p2m_missing)
- return mfn == INVALID_P2M_ENTRY;
-
- p2m_top[topidx][mididx][idx] = mfn;
-
- return true;
-}
-
-bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
- if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
- if (!alloc_p2m(pfn))
- return false;
-
- if (!__set_phys_to_machine(pfn, mfn))
- return false;
- }
-
- return true;
-}
-
-#define M2P_OVERRIDE_HASH_SHIFT 10
-#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT)
-
-static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH);
-static DEFINE_SPINLOCK(m2p_override_lock);
-
-static void __init m2p_override_init(void)
-{
- unsigned i;
-
- m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
- sizeof(unsigned long));
-
- for (i = 0; i < M2P_OVERRIDE_HASH; i++)
- INIT_LIST_HEAD(&m2p_overrides[i]);
-}
-
-static unsigned long mfn_hash(unsigned long mfn)
-{
- return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
-}
-
-/* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page,
- struct gnttab_map_grant_ref *kmap_op)
-{
- unsigned long flags;
- unsigned long pfn;
- unsigned long uninitialized_var(address);
- unsigned level;
- pte_t *ptep = NULL;
-
- pfn = page_to_pfn(page);
- if (!PageHighMem(page)) {
- address = (unsigned long)__va(pfn << PAGE_SHIFT);
- ptep = lookup_address(address, &level);
- if (WARN(ptep == NULL || level != PG_LEVEL_4K,
- "m2p_add_override: pfn %lx not mapped", pfn))
- return -EINVAL;
- }
- WARN_ON(PagePrivate(page));
- SetPagePrivate(page);
- set_page_private(page, mfn);
- page->index = pfn_to_mfn(pfn);
-
- if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
- return -ENOMEM;
-
- if (kmap_op != NULL) {
- if (!PageHighMem(page)) {
- struct multicall_space mcs =
- xen_mc_entry(sizeof(*kmap_op));
-
- MULTI_grant_table_op(mcs.mc,
- GNTTABOP_map_grant_ref, kmap_op, 1);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
- }
- /* let's use dev_bus_addr to record the old mfn instead */
- kmap_op->dev_bus_addr = page->index;
- page->index = (unsigned long) kmap_op;
- }
- spin_lock_irqsave(&m2p_override_lock, flags);
- list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
- spin_unlock_irqrestore(&m2p_override_lock, flags);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(m2p_add_override);
-int m2p_remove_override(struct page *page, bool clear_pte)
-{
- unsigned long flags;
- unsigned long mfn;
- unsigned long pfn;
- unsigned long uninitialized_var(address);
- unsigned level;
- pte_t *ptep = NULL;
-
- pfn = page_to_pfn(page);
- mfn = get_phys_to_machine(pfn);
- if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
- return -EINVAL;
-
- if (!PageHighMem(page)) {
- address = (unsigned long)__va(pfn << PAGE_SHIFT);
- ptep = lookup_address(address, &level);
-
- if (WARN(ptep == NULL || level != PG_LEVEL_4K,
- "m2p_remove_override: pfn %lx not mapped", pfn))
- return -EINVAL;
- }
-
- spin_lock_irqsave(&m2p_override_lock, flags);
- list_del(&page->lru);
- spin_unlock_irqrestore(&m2p_override_lock, flags);
- WARN_ON(!PagePrivate(page));
- ClearPagePrivate(page);
-
- if (clear_pte) {
- struct gnttab_map_grant_ref *map_op =
- (struct gnttab_map_grant_ref *) page->index;
- set_phys_to_machine(pfn, map_op->dev_bus_addr);
- if (!PageHighMem(page)) {
- struct multicall_space mcs;
- struct gnttab_unmap_grant_ref *unmap_op;
-
- /*
- * It might be that we queued all the m2p grant table
- * hypercalls in a multicall, then m2p_remove_override
- * get called before the multicall has actually been
- * issued. In this case handle is going to -1 because
- * it hasn't been modified yet.
- */
- if (map_op->handle == -1)
- xen_mc_flush();
- /*
- * Now if map_op->handle is negative it means that the
- * hypercall actually returned an error.
- */
- if (map_op->handle == GNTST_general_error) {
- printk(KERN_WARNING "m2p_remove_override: "
- "pfn %lx mfn %lx, failed to modify kernel mappings",
- pfn, mfn);
- return -1;
- }
-
- mcs = xen_mc_entry(
- sizeof(struct gnttab_unmap_grant_ref));
- unmap_op = mcs.args;
- unmap_op->host_addr = map_op->host_addr;
- unmap_op->handle = map_op->handle;
- unmap_op->dev_bus_addr = 0;
-
- MULTI_grant_table_op(mcs.mc,
- GNTTABOP_unmap_grant_ref, unmap_op, 1);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- set_pte_at(&init_mm, address, ptep,
- pfn_pte(pfn, PAGE_KERNEL));
- __flush_tlb_single(address);
- map_op->host_addr = 0;
- }
- } else
- set_phys_to_machine(pfn, page->index);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(m2p_remove_override);
-
-struct page *m2p_find_override(unsigned long mfn)
-{
- unsigned long flags;
- struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)];
- struct page *p, *ret;
-
- ret = NULL;
-
- spin_lock_irqsave(&m2p_override_lock, flags);
-
- list_for_each_entry(p, bucket, lru) {
- if (page_private(p) == mfn) {
- ret = p;
- break;
- }
- }
-
- spin_unlock_irqrestore(&m2p_override_lock, flags);
-
- return ret;
-}
-
-unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
-{
- struct page *p = m2p_find_override(mfn);
- unsigned long ret = pfn;
-
- if (p)
- ret = page_to_pfn(p);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-#include <linux/debugfs.h>
-#include "debugfs.h"
-static int p2m_dump_show(struct seq_file *m, void *v)
-{
- static const char * const level_name[] = { "top", "middle",
- "entry", "abnormal", "error"};
-#define TYPE_IDENTITY 0
-#define TYPE_MISSING 1
-#define TYPE_PFN 2
-#define TYPE_UNKNOWN 3
- static const char * const type_name[] = {
- [TYPE_IDENTITY] = "identity",
- [TYPE_MISSING] = "missing",
- [TYPE_PFN] = "pfn",
- [TYPE_UNKNOWN] = "abnormal"};
- unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
- unsigned int uninitialized_var(prev_level);
- unsigned int uninitialized_var(prev_type);
-
- if (!p2m_top)
- return 0;
-
- for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
- unsigned topidx = p2m_top_index(pfn);
- unsigned mididx = p2m_mid_index(pfn);
- unsigned idx = p2m_index(pfn);
- unsigned lvl, type;
-
- lvl = 4;
- type = TYPE_UNKNOWN;
- if (p2m_top[topidx] == p2m_mid_missing) {
- lvl = 0; type = TYPE_MISSING;
- } else if (p2m_top[topidx] == NULL) {
- lvl = 0; type = TYPE_UNKNOWN;
- } else if (p2m_top[topidx][mididx] == NULL) {
- lvl = 1; type = TYPE_UNKNOWN;
- } else if (p2m_top[topidx][mididx] == p2m_identity) {
- lvl = 1; type = TYPE_IDENTITY;
- } else if (p2m_top[topidx][mididx] == p2m_missing) {
- lvl = 1; type = TYPE_MISSING;
- } else if (p2m_top[topidx][mididx][idx] == 0) {
- lvl = 2; type = TYPE_UNKNOWN;
- } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
- lvl = 2; type = TYPE_IDENTITY;
- } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
- lvl = 2; type = TYPE_MISSING;
- } else if (p2m_top[topidx][mididx][idx] == pfn) {
- lvl = 2; type = TYPE_PFN;
- } else if (p2m_top[topidx][mididx][idx] != pfn) {
- lvl = 2; type = TYPE_PFN;
- }
- if (pfn == 0) {
- prev_level = lvl;
- prev_type = type;
- }
- if (pfn == MAX_DOMAIN_PAGES-1) {
- lvl = 3;
- type = TYPE_UNKNOWN;
- }
- if (prev_type != type) {
- seq_printf(m, " [0x%lx->0x%lx] %s\n",
- prev_pfn_type, pfn, type_name[prev_type]);
- prev_pfn_type = pfn;
- prev_type = type;
- }
- if (prev_level != lvl) {
- seq_printf(m, " [0x%lx->0x%lx] level %s\n",
- prev_pfn_level, pfn, level_name[prev_level]);
- prev_pfn_level = pfn;
- prev_level = lvl;
- }
- }
- return 0;
-#undef TYPE_IDENTITY
-#undef TYPE_MISSING
-#undef TYPE_PFN
-#undef TYPE_UNKNOWN
-}
-
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
- .open = p2m_dump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static struct dentry *d_mmu_debug;
-
-static int __init xen_p2m_debugfs(void)
-{
- struct dentry *d_xen = xen_init_debugfs();
-
- if (d_xen == NULL)
- return -ENOMEM;
-
- d_mmu_debug = debugfs_create_dir("mmu", d_xen);
-
- debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
- return 0;
-}
-fs_initcall(xen_p2m_debugfs);
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/ANDROID_3.4.5/arch/x86/xen/pci-swiotlb-xen.c b/ANDROID_3.4.5/arch/x86/xen/pci-swiotlb-xen.c
deleted file mode 100644
index 967633ad..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/pci-swiotlb-xen.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Glue code to lib/swiotlb-xen.c */
-
-#include <linux/dma-mapping.h>
-#include <linux/pci.h>
-#include <xen/swiotlb-xen.h>
-
-#include <asm/xen/hypervisor.h>
-#include <xen/xen.h>
-#include <asm/iommu_table.h>
-
-int xen_swiotlb __read_mostly;
-
-static struct dma_map_ops xen_swiotlb_dma_ops = {
- .mapping_error = xen_swiotlb_dma_mapping_error,
- .alloc = xen_swiotlb_alloc_coherent,
- .free = xen_swiotlb_free_coherent,
- .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
- .sync_single_for_device = xen_swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
- .map_sg = xen_swiotlb_map_sg_attrs,
- .unmap_sg = xen_swiotlb_unmap_sg_attrs,
- .map_page = xen_swiotlb_map_page,
- .unmap_page = xen_swiotlb_unmap_page,
- .dma_supported = xen_swiotlb_dma_supported,
-};
-
-/*
- * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
- *
- * This returns non-zero if we are forced to use xen_swiotlb (by the boot
- * option).
- */
-int __init pci_xen_swiotlb_detect(void)
-{
-
- /* If running as PV guest, either iommu=soft, or swiotlb=force will
- * activate this IOMMU. If running as PV privileged, activate it
- * irregardless.
- */
- if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
- (xen_pv_domain()))
- xen_swiotlb = 1;
-
- /* If we are running under Xen, we MUST disable the native SWIOTLB.
- * Don't worry about swiotlb_force flag activating the native, as
- * the 'swiotlb' flag is the only one turning it on. */
- if (xen_pv_domain())
- swiotlb = 0;
-
- return xen_swiotlb;
-}
-
-void __init pci_xen_swiotlb_init(void)
-{
- if (xen_swiotlb) {
- xen_swiotlb_init(1);
- dma_ops = &xen_swiotlb_dma_ops;
-
- /* Make sure ACS will be enabled */
- pci_request_acs();
- }
-}
-IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
- 0,
- pci_xen_swiotlb_init,
- 0);
diff --git a/ANDROID_3.4.5/arch/x86/xen/platform-pci-unplug.c b/ANDROID_3.4.5/arch/x86/xen/platform-pci-unplug.c
deleted file mode 100644
index ffcf2615..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/platform-pci-unplug.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/******************************************************************************
- * platform-pci-unplug.c
- *
- * Xen platform PCI device driver
- * Copyright (c) 2010, Citrix
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/module.h>
-
-#include <xen/platform_pci.h>
-
-#define XEN_PLATFORM_ERR_MAGIC -1
-#define XEN_PLATFORM_ERR_PROTOCOL -2
-#define XEN_PLATFORM_ERR_BLACKLIST -3
-
-/* store the value of xen_emul_unplug after the unplug is done */
-int xen_platform_pci_unplug;
-EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
-#ifdef CONFIG_XEN_PVHVM
-static int xen_emul_unplug;
-
-static int check_platform_magic(void)
-{
- short magic;
- char protocol;
-
- magic = inw(XEN_IOPORT_MAGIC);
- if (magic != XEN_IOPORT_MAGIC_VAL) {
- printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
- return XEN_PLATFORM_ERR_MAGIC;
- }
-
- protocol = inb(XEN_IOPORT_PROTOVER);
-
- printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
- protocol);
-
- switch (protocol) {
- case 1:
- outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
- outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
- if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
- printk(KERN_ERR "Xen Platform: blacklisted by host\n");
- return XEN_PLATFORM_ERR_BLACKLIST;
- }
- break;
- default:
- printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version");
- return XEN_PLATFORM_ERR_PROTOCOL;
- }
-
- return 0;
-}
-
-void xen_unplug_emulated_devices(void)
-{
- int r;
-
- /* user explicitly requested no unplug */
- if (xen_emul_unplug & XEN_UNPLUG_NEVER)
- return;
- /* check the version of the xen platform PCI device */
- r = check_platform_magic();
- /* If the version matches enable the Xen platform PCI driver.
- * Also enable the Xen platform PCI driver if the host does
- * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC)
- * but the user told us that unplugging is unnecessary. */
- if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
- (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)))
- return;
- /* Set the default value of xen_emul_unplug depending on whether or
- * not the Xen PV frontends and the Xen platform PCI driver have
- * been compiled for this kernel (modules or built-in are both OK). */
- if (!xen_emul_unplug) {
- if (xen_must_unplug_nics()) {
- printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
- "been compiled for this kernel: unplug emulated NICs.\n");
- xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
- }
- if (xen_must_unplug_disks()) {
- printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
- "been compiled for this kernel: unplug emulated disks.\n"
- "You might have to change the root device\n"
- "from /dev/hd[a-d] to /dev/xvd[a-d]\n"
- "in your root= kernel command line option\n");
- xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
- }
- }
- /* Now unplug the emulated devices */
- if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))
- outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
- xen_platform_pci_unplug = xen_emul_unplug;
-}
-
-static int __init parse_xen_emul_unplug(char *arg)
-{
- char *p, *q;
- int l;
-
- for (p = arg; p; p = q) {
- q = strchr(p, ',');
- if (q) {
- l = q - p;
- q++;
- } else {
- l = strlen(p);
- }
- if (!strncmp(p, "all", l))
- xen_emul_unplug |= XEN_UNPLUG_ALL;
- else if (!strncmp(p, "ide-disks", l))
- xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
- else if (!strncmp(p, "aux-ide-disks", l))
- xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
- else if (!strncmp(p, "nics", l))
- xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
- else if (!strncmp(p, "unnecessary", l))
- xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY;
- else if (!strncmp(p, "never", l))
- xen_emul_unplug |= XEN_UNPLUG_NEVER;
- else
- printk(KERN_WARNING "unrecognised option '%s' "
- "in parameter 'xen_emul_unplug'\n", p);
- }
- return 0;
-}
-early_param("xen_emul_unplug", parse_xen_emul_unplug);
-#endif
diff --git a/ANDROID_3.4.5/arch/x86/xen/setup.c b/ANDROID_3.4.5/arch/x86/xen/setup.c
deleted file mode 100644
index 1ba8dff2..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/setup.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Machine specific setup for xen
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/pm.h>
-#include <linux/memblock.h>
-#include <linux/cpuidle.h>
-#include <linux/cpufreq.h>
-
-#include <asm/elf.h>
-#include <asm/vdso.h>
-#include <asm/e820.h>
-#include <asm/setup.h>
-#include <asm/acpi.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/xen.h>
-#include <xen/page.h>
-#include <xen/interface/callback.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/physdev.h>
-#include <xen/features.h>
-
-#include "xen-ops.h"
-#include "vdso.h"
-
-/* These are code, but not functions. Defined in entry.S */
-extern const char xen_hypervisor_callback[];
-extern const char xen_failsafe_callback[];
-extern void xen_sysenter_target(void);
-extern void xen_syscall_target(void);
-extern void xen_syscall32_target(void);
-
-/* Amount of extra memory space we add to the e820 ranges */
-struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
-
-/* Number of pages released from the initial allocation. */
-unsigned long xen_released_pages;
-
-/*
- * The maximum amount of extra memory compared to the base size. The
- * main scaling factor is the size of struct page. At extreme ratios
- * of base:extra, all the base memory can be filled with page
- * structures for the extra memory, leaving no space for anything
- * else.
- *
- * 10x seems like a reasonable balance between scaling flexibility and
- * leaving a practically usable system.
- */
-#define EXTRA_MEM_RATIO (10)
-
-static void __init xen_add_extra_mem(u64 start, u64 size)
-{
- unsigned long pfn;
- int i;
-
- for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
- /* Add new region. */
- if (xen_extra_mem[i].size == 0) {
- xen_extra_mem[i].start = start;
- xen_extra_mem[i].size = size;
- break;
- }
- /* Append to existing region. */
- if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
- xen_extra_mem[i].size += size;
- break;
- }
- }
- if (i == XEN_EXTRA_MEM_MAX_REGIONS)
- printk(KERN_WARNING "Warning: not enough extra memory regions\n");
-
- memblock_reserve(start, size);
-
- xen_max_p2m_pfn = PFN_DOWN(start + size);
-
- for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-}
-
-static unsigned long __init xen_release_chunk(unsigned long start,
- unsigned long end)
-{
- struct xen_memory_reservation reservation = {
- .address_bits = 0,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- unsigned long len = 0;
- unsigned long pfn;
- int ret;
-
- for(pfn = start; pfn < end; pfn++) {
- unsigned long mfn = pfn_to_mfn(pfn);
-
- /* Make sure pfn exists to start with */
- if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
- continue;
-
- set_xen_guest_handle(reservation.extent_start, &mfn);
- reservation.nr_extents = 1;
-
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
- if (ret == 1) {
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
- len++;
- }
- }
- printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n",
- start, end, len);
-
- return len;
-}
-
-static unsigned long __init xen_set_identity_and_release(
- const struct e820entry *list, size_t map_size, unsigned long nr_pages)
-{
- phys_addr_t start = 0;
- unsigned long released = 0;
- unsigned long identity = 0;
- const struct e820entry *entry;
- int i;
-
- /*
- * Combine non-RAM regions and gaps until a RAM region (or the
- * end of the map) is reached, then set the 1:1 map and
- * release the pages (if available) in those non-RAM regions.
- *
- * The combined non-RAM regions are rounded to a whole number
- * of pages so any partial pages are accessible via the 1:1
- * mapping. This is needed for some BIOSes that put (for
- * example) the DMI tables in a reserved region that begins on
- * a non-page boundary.
- */
- for (i = 0, entry = list; i < map_size; i++, entry++) {
- phys_addr_t end = entry->addr + entry->size;
-
- if (entry->type == E820_RAM || i == map_size - 1) {
- unsigned long start_pfn = PFN_DOWN(start);
- unsigned long end_pfn = PFN_UP(end);
-
- if (entry->type == E820_RAM)
- end_pfn = PFN_UP(entry->addr);
-
- if (start_pfn < end_pfn) {
- if (start_pfn < nr_pages)
- released += xen_release_chunk(
- start_pfn, min(end_pfn, nr_pages));
-
- identity += set_phys_range_identity(
- start_pfn, end_pfn);
- }
- start = end;
- }
- }
-
- printk(KERN_INFO "Released %lu pages of unused memory\n", released);
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
-
- return released;
-}
-
-static unsigned long __init xen_get_max_pages(void)
-{
- unsigned long max_pages = MAX_DOMAIN_PAGES;
- domid_t domid = DOMID_SELF;
- int ret;
-
- /*
- * For the initial domain we use the maximum reservation as
- * the maximum page.
- *
- * For guest domains the current maximum reservation reflects
- * the current maximum rather than the static maximum. In this
- * case the e820 map provided to us will cover the static
- * maximum region.
- */
- if (xen_initial_domain()) {
- ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
- if (ret > 0)
- max_pages = ret;
- }
-
- return min(max_pages, MAX_DOMAIN_PAGES);
-}
-
-static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
-{
- u64 end = start + size;
-
- /* Align RAM regions to page boundaries. */
- if (type == E820_RAM) {
- start = PAGE_ALIGN(start);
- end &= ~((u64)PAGE_SIZE - 1);
- }
-
- e820_add_region(start, end - start, type);
-}
-
-/**
- * machine_specific_memory_setup - Hook for machine specific memory setup.
- **/
-char * __init xen_memory_setup(void)
-{
- static struct e820entry map[E820MAX] __initdata;
-
- unsigned long max_pfn = xen_start_info->nr_pages;
- unsigned long long mem_end;
- int rc;
- struct xen_memory_map memmap;
- unsigned long max_pages;
- unsigned long extra_pages = 0;
- int i;
- int op;
-
- max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
- mem_end = PFN_PHYS(max_pfn);
-
- memmap.nr_entries = E820MAX;
- set_xen_guest_handle(memmap.buffer, map);
-
- op = xen_initial_domain() ?
- XENMEM_machine_memory_map :
- XENMEM_memory_map;
- rc = HYPERVISOR_memory_op(op, &memmap);
- if (rc == -ENOSYS) {
- BUG_ON(xen_initial_domain());
- memmap.nr_entries = 1;
- map[0].addr = 0ULL;
- map[0].size = mem_end;
- /* 8MB slack (to balance backend allocations). */
- map[0].size += 8ULL << 20;
- map[0].type = E820_RAM;
- rc = 0;
- }
- BUG_ON(rc);
-
- /* Make sure the Xen-supplied memory map is well-ordered. */
- sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
-
- max_pages = xen_get_max_pages();
- if (max_pages > max_pfn)
- extra_pages += max_pages - max_pfn;
-
- /*
- * Set P2M for all non-RAM pages and E820 gaps to be identity
- * type PFNs. Any RAM pages that would be made inaccesible by
- * this are first released.
- */
- xen_released_pages = xen_set_identity_and_release(
- map, memmap.nr_entries, max_pfn);
- extra_pages += xen_released_pages;
-
- /*
- * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
- * factor the base size. On non-highmem systems, the base
- * size is the full initial memory allocation; on highmem it
- * is limited to the max size of lowmem, so that it doesn't
- * get completely filled.
- *
- * In principle there could be a problem in lowmem systems if
- * the initial memory is also very large with respect to
- * lowmem, but we won't try to deal with that here.
- */
- extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
- extra_pages);
-
- i = 0;
- while (i < memmap.nr_entries) {
- u64 addr = map[i].addr;
- u64 size = map[i].size;
- u32 type = map[i].type;
-
- if (type == E820_RAM) {
- if (addr < mem_end) {
- size = min(size, mem_end - addr);
- } else if (extra_pages) {
- size = min(size, (u64)extra_pages * PAGE_SIZE);
- extra_pages -= size / PAGE_SIZE;
- xen_add_extra_mem(addr, size);
- } else
- type = E820_UNUSABLE;
- }
-
- xen_align_and_add_e820_region(addr, size, type);
-
- map[i].addr += size;
- map[i].size -= size;
- if (map[i].size == 0)
- i++;
- }
-
- /*
- * In domU, the ISA region is normal, usable memory, but we
- * reserve ISA memory anyway because too many things poke
- * about in there.
- */
- e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
- E820_RESERVED);
-
- /*
- * Reserve Xen bits:
- * - mfn_list
- * - xen_start_info
- * See comment above "struct start_info" in <xen/interface/xen.h>
- */
- memblock_reserve(__pa(xen_start_info->mfn_list),
- xen_start_info->pt_base - xen_start_info->mfn_list);
-
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
- return "Xen";
-}
-
-/*
- * Set the bit indicating "nosegneg" library variants should be used.
- * We only need to bother in pure 32-bit mode; compat 32-bit processes
- * can have un-truncated segments, so wrapping around is allowed.
- */
-static void __init fiddle_vdso(void)
-{
-#ifdef CONFIG_X86_32
- u32 *mask;
- mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
- *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
- mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
- *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
-#endif
-}
-
-static int __cpuinit register_callback(unsigned type, const void *func)
-{
- struct callback_register callback = {
- .type = type,
- .address = XEN_CALLBACK(__KERNEL_CS, func),
- .flags = CALLBACKF_mask_events,
- };
-
- return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
-}
-
-void __cpuinit xen_enable_sysenter(void)
-{
- int ret;
- unsigned sysenter_feature;
-
-#ifdef CONFIG_X86_32
- sysenter_feature = X86_FEATURE_SEP;
-#else
- sysenter_feature = X86_FEATURE_SYSENTER32;
-#endif
-
- if (!boot_cpu_has(sysenter_feature))
- return;
-
- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
- if(ret != 0)
- setup_clear_cpu_cap(sysenter_feature);
-}
-
-void __cpuinit xen_enable_syscall(void)
-{
-#ifdef CONFIG_X86_64
- int ret;
-
- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
- if (ret != 0) {
- printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
- /* Pretty fatal; 64-bit userspace has no other
- mechanism for syscalls. */
- }
-
- if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
- ret = register_callback(CALLBACKTYPE_syscall32,
- xen_syscall32_target);
- if (ret != 0)
- setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
- }
-#endif /* CONFIG_X86_64 */
-}
-
-void __init xen_arch_setup(void)
-{
- xen_panic_handler_init();
-
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
-
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
-
- if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
- register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
- BUG();
-
- xen_enable_sysenter();
- xen_enable_syscall();
-
-#ifdef CONFIG_ACPI
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
- disable_acpi();
- }
-#endif
-
- memcpy(boot_command_line, xen_start_info->cmd_line,
- MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
- COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
-
- /* Set up idle, making sure it calls safe_halt() pvop */
-#ifdef CONFIG_X86_32
- boot_cpu_data.hlt_works_ok = 1;
-#endif
- disable_cpuidle();
- disable_cpufreq();
- WARN_ON(set_pm_idle_to_default());
- fiddle_vdso();
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/smp.c b/ANDROID_3.4.5/arch/x86/xen/smp.c
deleted file mode 100644
index 0503c0c4..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/smp.c
+++ /dev/null
@@ -1,592 +0,0 @@
-/*
- * Xen SMP support
- *
- * This file implements the Xen versions of smp_ops. SMP under Xen is
- * very straightforward. Bringing a CPU up is simply a matter of
- * loading its initial context and setting it running.
- *
- * IPIs are handled through the Xen event mechanism.
- *
- * Because virtual CPUs can be scheduled onto any real CPU, there's no
- * useful topology information for the kernel to make use of. As a
- * result, all CPUs are treated as if they're single-core and
- * single-threaded.
- */
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-
-#include <asm/paravirt.h>
-#include <asm/desc.h>
-#include <asm/pgtable.h>
-#include <asm/cpu.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/vcpu.h>
-
-#include <asm/xen/interface.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/xen.h>
-#include <xen/page.h>
-#include <xen/events.h>
-
-#include <xen/hvc-console.h>
-#include "xen-ops.h"
-#include "mmu.h"
-
-cpumask_var_t xen_cpu_initialized_map;
-
-static DEFINE_PER_CPU(int, xen_resched_irq);
-static DEFINE_PER_CPU(int, xen_callfunc_irq);
-static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
-static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
-
-static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
-
-/*
- * Reschedule call back.
- */
-static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
-{
- inc_irq_stat(irq_resched_count);
- scheduler_ipi();
-
- return IRQ_HANDLED;
-}
-
-static void __cpuinit cpu_bringup(void)
-{
- int cpu;
-
- cpu_init();
- touch_softlockup_watchdog();
- preempt_disable();
-
- xen_enable_sysenter();
- xen_enable_syscall();
-
- cpu = smp_processor_id();
- smp_store_cpu_info(cpu);
- cpu_data(cpu).x86_max_cores = 1;
- set_cpu_sibling_map(cpu);
-
- xen_setup_cpu_clockevents();
-
- notify_cpu_starting(cpu);
-
- ipi_call_lock();
- set_cpu_online(cpu, true);
- ipi_call_unlock();
-
- this_cpu_write(cpu_state, CPU_ONLINE);
-
- wmb();
-
- /* We can take interrupts now: we're officially "up". */
- local_irq_enable();
-
- wmb(); /* make sure everything is out */
-}
-
-static void __cpuinit cpu_bringup_and_idle(void)
-{
- cpu_bringup();
- cpu_idle();
-}
-
-static int xen_smp_intr_init(unsigned int cpu)
-{
- int rc;
- const char *resched_name, *callfunc_name, *debug_name;
-
- resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
- rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
- cpu,
- xen_reschedule_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- resched_name,
- NULL);
- if (rc < 0)
- goto fail;
- per_cpu(xen_resched_irq, cpu) = rc;
-
- callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
- rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
- cpu,
- xen_call_function_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- callfunc_name,
- NULL);
- if (rc < 0)
- goto fail;
- per_cpu(xen_callfunc_irq, cpu) = rc;
-
- debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
- rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
- IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
- debug_name, NULL);
- if (rc < 0)
- goto fail;
- per_cpu(xen_debug_irq, cpu) = rc;
-
- callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
- rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
- cpu,
- xen_call_function_single_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- callfunc_name,
- NULL);
- if (rc < 0)
- goto fail;
- per_cpu(xen_callfuncsingle_irq, cpu) = rc;
-
- return 0;
-
- fail:
- if (per_cpu(xen_resched_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
- if (per_cpu(xen_callfunc_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
- if (per_cpu(xen_debug_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
- if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
- NULL);
-
- return rc;
-}
-
-static void __init xen_fill_possible_map(void)
-{
- int i, rc;
-
- if (xen_initial_domain())
- return;
-
- for (i = 0; i < nr_cpu_ids; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0) {
- num_processors++;
- set_cpu_possible(i, true);
- }
- }
-}
-
-static void __init xen_filter_cpu_maps(void)
-{
- int i, rc;
- unsigned int subtract = 0;
-
- if (!xen_initial_domain())
- return;
-
- num_processors = 0;
- disabled_cpus = 0;
- for (i = 0; i < nr_cpu_ids; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0) {
- num_processors++;
- set_cpu_possible(i, true);
- } else {
- set_cpu_possible(i, false);
- set_cpu_present(i, false);
- subtract++;
- }
- }
-#ifdef CONFIG_HOTPLUG_CPU
- /* This is akin to using 'nr_cpus' on the Linux command line.
- * Which is OK as when we use 'dom0_max_vcpus=X' we can only
- * have up to X, while nr_cpu_ids is greater than X. This
- * normally is not a problem, except when CPU hotplugging
- * is involved and then there might be more than X CPUs
- * in the guest - which will not work as there is no
- * hypercall to expand the max number of VCPUs an already
- * running guest has. So cap it up to X. */
- if (subtract)
- nr_cpu_ids = nr_cpu_ids - subtract;
-#endif
-
-}
-
-static void __init xen_smp_prepare_boot_cpu(void)
-{
- BUG_ON(smp_processor_id() != 0);
- native_smp_prepare_boot_cpu();
-
- /* We've switched to the "real" per-cpu gdt, so make sure the
- old memory can be recycled */
- make_lowmem_page_readwrite(xen_initial_gdt);
-
- xen_filter_cpu_maps();
- xen_setup_vcpu_info_placement();
-}
-
-static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
-{
- unsigned cpu;
- unsigned int i;
-
- if (skip_ioapic_setup) {
- char *m = (max_cpus == 0) ?
- "The nosmp parameter is incompatible with Xen; " \
- "use Xen dom0_max_vcpus=1 parameter" :
- "The noapic parameter is incompatible with Xen";
-
- xen_raw_printk(m);
- panic(m);
- }
- xen_init_lock_cpu(0);
-
- smp_store_cpu_info(0);
- cpu_data(0).x86_max_cores = 1;
-
- for_each_possible_cpu(i) {
- zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
- }
- set_cpu_sibling_map(0);
-
- if (xen_smp_intr_init(0))
- BUG();
-
- if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
- panic("could not allocate xen_cpu_initialized_map\n");
-
- cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
-
- /* Restrict the possible_map according to max_cpus. */
- while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
- for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
- continue;
- set_cpu_possible(cpu, false);
- }
-
- for_each_possible_cpu (cpu) {
- struct task_struct *idle;
-
- if (cpu == 0)
- continue;
-
- idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
-
- set_cpu_present(cpu, true);
- }
-}
-
-static int __cpuinit
-cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
-{
- struct vcpu_guest_context *ctxt;
- struct desc_struct *gdt;
- unsigned long gdt_mfn;
-
- if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
- return 0;
-
- ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (ctxt == NULL)
- return -ENOMEM;
-
- gdt = get_cpu_gdt_table(cpu);
-
- ctxt->flags = VGCF_IN_KERNEL;
- ctxt->user_regs.ds = __USER_DS;
- ctxt->user_regs.es = __USER_DS;
- ctxt->user_regs.ss = __KERNEL_DS;
-#ifdef CONFIG_X86_32
- ctxt->user_regs.fs = __KERNEL_PERCPU;
- ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
-#else
- ctxt->gs_base_kernel = per_cpu_offset(cpu);
-#endif
- ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
- ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
-
- memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
-
- xen_copy_trap_info(ctxt->trap_ctxt);
-
- ctxt->ldt_ents = 0;
-
- BUG_ON((unsigned long)gdt & ~PAGE_MASK);
-
- gdt_mfn = arbitrary_virt_to_mfn(gdt);
- make_lowmem_page_readonly(gdt);
- make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
-
- ctxt->gdt_frames[0] = gdt_mfn;
- ctxt->gdt_ents = GDT_ENTRIES;
-
- ctxt->user_regs.cs = __KERNEL_CS;
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
-
- ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
-
-#ifdef CONFIG_X86_32
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
-#endif
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
- ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
-
- per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
- ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
-
- if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
- BUG();
-
- kfree(ctxt);
- return 0;
-}
-
-static int __cpuinit xen_cpu_up(unsigned int cpu)
-{
- struct task_struct *idle = idle_task(cpu);
- int rc;
-
- per_cpu(current_task, cpu) = idle;
-#ifdef CONFIG_X86_32
- irq_ctx_init(cpu);
-#else
- clear_tsk_thread_flag(idle, TIF_FORK);
- per_cpu(kernel_stack, cpu) =
- (unsigned long)task_stack_page(idle) -
- KERNEL_STACK_OFFSET + THREAD_SIZE;
-#endif
- xen_setup_runstate_info(cpu);
- xen_setup_timer(cpu);
- xen_init_lock_cpu(cpu);
-
- per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-
- /* make sure interrupts start blocked */
- per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
-
- rc = cpu_initialize_context(cpu, idle);
- if (rc)
- return rc;
-
- if (num_online_cpus() == 1)
- alternatives_smp_switch(1);
-
- rc = xen_smp_intr_init(cpu);
- if (rc)
- return rc;
-
- rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
- BUG_ON(rc);
-
- while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
- HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
- barrier();
- }
-
- return 0;
-}
-
-static void xen_smp_cpus_done(unsigned int max_cpus)
-{
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int xen_cpu_disable(void)
-{
- unsigned int cpu = smp_processor_id();
- if (cpu == 0)
- return -EBUSY;
-
- cpu_disable_common();
-
- load_cr3(swapper_pg_dir);
- return 0;
-}
-
-static void xen_cpu_die(unsigned int cpu)
-{
- while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ/10);
- }
- unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
- xen_uninit_lock_cpu(cpu);
- xen_teardown_timer(cpu);
-
- if (num_online_cpus() == 1)
- alternatives_smp_switch(0);
-}
-
-static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
-{
- play_dead_common();
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
- cpu_bringup();
- /*
- * Balance out the preempt calls - as we are running in cpu_idle
- * loop which has been called at bootup from cpu_bringup_and_idle.
- * The cpucpu_bringup_and_idle called cpu_bringup which made a
- * preempt_disable() So this preempt_enable will balance it out.
- */
- preempt_enable();
-}
-
-#else /* !CONFIG_HOTPLUG_CPU */
-static int xen_cpu_disable(void)
-{
- return -ENOSYS;
-}
-
-static void xen_cpu_die(unsigned int cpu)
-{
- BUG();
-}
-
-static void xen_play_dead(void)
-{
- BUG();
-}
-
-#endif
-static void stop_self(void *v)
-{
- int cpu = smp_processor_id();
-
- /* make sure we're not pinning something down */
- load_cr3(swapper_pg_dir);
- /* should set up a minimal gdt */
-
- set_cpu_online(cpu, false);
-
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
- BUG();
-}
-
-static void xen_stop_other_cpus(int wait)
-{
- smp_call_function(stop_self, NULL, wait);
-}
-
-static void xen_smp_send_reschedule(int cpu)
-{
- xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
-}
-
-static void xen_send_IPI_mask(const struct cpumask *mask,
- enum ipi_vector vector)
-{
- unsigned cpu;
-
- for_each_cpu_and(cpu, mask, cpu_online_mask)
- xen_send_IPI_one(cpu, vector);
-}
-
-static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
-{
- int cpu;
-
- xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
- /* Make sure other vcpus get a chance to run if they need to. */
- for_each_cpu(cpu, mask) {
- if (xen_vcpu_stolen(cpu)) {
- HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
- break;
- }
- }
-}
-
-static void xen_smp_send_call_function_single_ipi(int cpu)
-{
- xen_send_IPI_mask(cpumask_of(cpu),
- XEN_CALL_FUNCTION_SINGLE_VECTOR);
-}
-
-static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
-{
- irq_enter();
- generic_smp_call_function_interrupt();
- inc_irq_stat(irq_call_count);
- irq_exit();
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
-{
- irq_enter();
- generic_smp_call_function_single_interrupt();
- inc_irq_stat(irq_call_count);
- irq_exit();
-
- return IRQ_HANDLED;
-}
-
-static const struct smp_ops xen_smp_ops __initconst = {
- .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
- .smp_prepare_cpus = xen_smp_prepare_cpus,
- .smp_cpus_done = xen_smp_cpus_done,
-
- .cpu_up = xen_cpu_up,
- .cpu_die = xen_cpu_die,
- .cpu_disable = xen_cpu_disable,
- .play_dead = xen_play_dead,
-
- .stop_other_cpus = xen_stop_other_cpus,
- .smp_send_reschedule = xen_smp_send_reschedule,
-
- .send_call_func_ipi = xen_smp_send_call_function_ipi,
- .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
-};
-
-void __init xen_smp_init(void)
-{
- smp_ops = xen_smp_ops;
- xen_fill_possible_map();
- xen_init_spinlocks();
-}
-
-static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
-{
- native_smp_prepare_cpus(max_cpus);
- WARN_ON(xen_smp_intr_init(0));
-
- xen_init_lock_cpu(0);
-}
-
-static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
-{
- int rc;
- rc = native_cpu_up(cpu);
- WARN_ON (xen_smp_intr_init(cpu));
- return rc;
-}
-
-static void xen_hvm_cpu_die(unsigned int cpu)
-{
- unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
- native_cpu_die(cpu);
-}
-
-void __init xen_hvm_smp_init(void)
-{
- if (!xen_have_vector_callback)
- return;
- smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
- smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
- smp_ops.cpu_up = xen_hvm_cpu_up;
- smp_ops.cpu_die = xen_hvm_cpu_die;
- smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
- smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/spinlock.c b/ANDROID_3.4.5/arch/x86/xen/spinlock.c
deleted file mode 100644
index d69cc6c3..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/spinlock.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Split spinlock implementation out into its own file, so it can be
- * compiled in a FTRACE-compatible way.
- */
-#include <linux/kernel_stat.h>
-#include <linux/spinlock.h>
-#include <linux/debugfs.h>
-#include <linux/log2.h>
-#include <linux/gfp.h>
-
-#include <asm/paravirt.h>
-
-#include <xen/interface/xen.h>
-#include <xen/events.h>
-
-#include "xen-ops.h"
-#include "debugfs.h"
-
-#ifdef CONFIG_XEN_DEBUG_FS
-static struct xen_spinlock_stats
-{
- u64 taken;
- u32 taken_slow;
- u32 taken_slow_nested;
- u32 taken_slow_pickup;
- u32 taken_slow_spurious;
- u32 taken_slow_irqenable;
-
- u64 released;
- u32 released_slow;
- u32 released_slow_kicked;
-
-#define HISTO_BUCKETS 30
- u32 histo_spin_total[HISTO_BUCKETS+1];
- u32 histo_spin_spinning[HISTO_BUCKETS+1];
- u32 histo_spin_blocked[HISTO_BUCKETS+1];
-
- u64 time_total;
- u64 time_spinning;
- u64 time_blocked;
-} spinlock_stats;
-
-static u8 zero_stats;
-
-static unsigned lock_timeout = 1 << 10;
-#define TIMEOUT lock_timeout
-
-static inline void check_zero(void)
-{
- if (unlikely(zero_stats)) {
- memset(&spinlock_stats, 0, sizeof(spinlock_stats));
- zero_stats = 0;
- }
-}
-
-#define ADD_STATS(elem, val) \
- do { check_zero(); spinlock_stats.elem += (val); } while(0)
-
-static inline u64 spin_time_start(void)
-{
- return xen_clocksource_read();
-}
-
-static void __spin_time_accum(u64 delta, u32 *array)
-{
- unsigned index = ilog2(delta);
-
- check_zero();
-
- if (index < HISTO_BUCKETS)
- array[index]++;
- else
- array[HISTO_BUCKETS]++;
-}
-
-static inline void spin_time_accum_spinning(u64 start)
-{
- u32 delta = xen_clocksource_read() - start;
-
- __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
- spinlock_stats.time_spinning += delta;
-}
-
-static inline void spin_time_accum_total(u64 start)
-{
- u32 delta = xen_clocksource_read() - start;
-
- __spin_time_accum(delta, spinlock_stats.histo_spin_total);
- spinlock_stats.time_total += delta;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
- u32 delta = xen_clocksource_read() - start;
-
- __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
- spinlock_stats.time_blocked += delta;
-}
-#else /* !CONFIG_XEN_DEBUG_FS */
-#define TIMEOUT (1 << 10)
-#define ADD_STATS(elem, val) do { (void)(val); } while(0)
-
-static inline u64 spin_time_start(void)
-{
- return 0;
-}
-
-static inline void spin_time_accum_total(u64 start)
-{
-}
-static inline void spin_time_accum_spinning(u64 start)
-{
-}
-static inline void spin_time_accum_blocked(u64 start)
-{
-}
-#endif /* CONFIG_XEN_DEBUG_FS */
-
-/*
- * Size struct xen_spinlock so it's the same as arch_spinlock_t.
- */
-#if NR_CPUS < 256
-typedef u8 xen_spinners_t;
-# define inc_spinners(xl) \
- asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory");
-# define dec_spinners(xl) \
- asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory");
-#else
-typedef u16 xen_spinners_t;
-# define inc_spinners(xl) \
- asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory");
-# define dec_spinners(xl) \
- asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
-#endif
-
-struct xen_spinlock {
- unsigned char lock; /* 0 -> free; 1 -> locked */
- xen_spinners_t spinners; /* count of waiting cpus */
-};
-
-static int xen_spin_is_locked(struct arch_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
- return xl->lock != 0;
-}
-
-static int xen_spin_is_contended(struct arch_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
- /* Not strictly true; this is only the count of contended
- lock-takers entering the slow path. */
- return xl->spinners != 0;
-}
-
-static int xen_spin_trylock(struct arch_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- u8 old = 1;
-
- asm("xchgb %b0,%1"
- : "+q" (old), "+m" (xl->lock) : : "memory");
-
- return old == 0;
-}
-
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
-
-/*
- * Mark a cpu as interested in a lock. Returns the CPU's previous
- * lock of interest, in case we got preempted by an interrupt.
- */
-static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
-{
- struct xen_spinlock *prev;
-
- prev = __this_cpu_read(lock_spinners);
- __this_cpu_write(lock_spinners, xl);
-
- wmb(); /* set lock of interest before count */
-
- inc_spinners(xl);
-
- return prev;
-}
-
-/*
- * Mark a cpu as no longer interested in a lock. Restores previous
- * lock of interest (NULL for none).
- */
-static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
-{
- dec_spinners(xl);
- wmb(); /* decrement count before restoring lock */
- __this_cpu_write(lock_spinners, prev);
-}
-
-static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- struct xen_spinlock *prev;
- int irq = __this_cpu_read(lock_kicker_irq);
- int ret;
- u64 start;
-
- /* If kicker interrupts not initialized yet, just spin */
- if (irq == -1)
- return 0;
-
- start = spin_time_start();
-
- /* announce we're spinning */
- prev = spinning_lock(xl);
-
- ADD_STATS(taken_slow, 1);
- ADD_STATS(taken_slow_nested, prev != NULL);
-
- do {
- unsigned long flags;
-
- /* clear pending */
- xen_clear_irq_pending(irq);
-
- /* check again make sure it didn't become free while
- we weren't looking */
- ret = xen_spin_trylock(lock);
- if (ret) {
- ADD_STATS(taken_slow_pickup, 1);
-
- /*
- * If we interrupted another spinlock while it
- * was blocking, make sure it doesn't block
- * without rechecking the lock.
- */
- if (prev != NULL)
- xen_set_irq_pending(irq);
- goto out;
- }
-
- flags = arch_local_save_flags();
- if (irq_enable) {
- ADD_STATS(taken_slow_irqenable, 1);
- raw_local_irq_enable();
- }
-
- /*
- * Block until irq becomes pending. If we're
- * interrupted at this point (after the trylock but
- * before entering the block), then the nested lock
- * handler guarantees that the irq will be left
- * pending if there's any chance the lock became free;
- * xen_poll_irq() returns immediately if the irq is
- * pending.
- */
- xen_poll_irq(irq);
-
- raw_local_irq_restore(flags);
-
- ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
- } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
-
- kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
-
-out:
- unspinning_lock(xl, prev);
- spin_time_accum_blocked(start);
-
- return ret;
-}
-
-static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
- unsigned timeout;
- u8 oldval;
- u64 start_spin;
-
- ADD_STATS(taken, 1);
-
- start_spin = spin_time_start();
-
- do {
- u64 start_spin_fast = spin_time_start();
-
- timeout = TIMEOUT;
-
- asm("1: xchgb %1,%0\n"
- " testb %1,%1\n"
- " jz 3f\n"
- "2: rep;nop\n"
- " cmpb $0,%0\n"
- " je 1b\n"
- " dec %2\n"
- " jnz 2b\n"
- "3:\n"
- : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
- : "1" (1)
- : "memory");
-
- spin_time_accum_spinning(start_spin_fast);
-
- } while (unlikely(oldval != 0 &&
- (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
-
- spin_time_accum_total(start_spin);
-}
-
-static void xen_spin_lock(struct arch_spinlock *lock)
-{
- __xen_spin_lock(lock, false);
-}
-
-static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
-{
- __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
-}
-
-static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
-{
- int cpu;
-
- ADD_STATS(released_slow, 1);
-
- for_each_online_cpu(cpu) {
- /* XXX should mix up next cpu selection */
- if (per_cpu(lock_spinners, cpu) == xl) {
- ADD_STATS(released_slow_kicked, 1);
- xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
- break;
- }
- }
-}
-
-static void xen_spin_unlock(struct arch_spinlock *lock)
-{
- struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
- ADD_STATS(released, 1);
-
- smp_wmb(); /* make sure no writes get moved after unlock */
- xl->lock = 0; /* release lock */
-
- /*
- * Make sure unlock happens before checking for waiting
- * spinners. We need a strong barrier to enforce the
- * write-read ordering to different memory locations, as the
- * CPU makes no implied guarantees about their ordering.
- */
- mb();
-
- if (unlikely(xl->spinners))
- xen_spin_unlock_slow(xl);
-}
-
-static irqreturn_t dummy_handler(int irq, void *dev_id)
-{
- BUG();
- return IRQ_HANDLED;
-}
-
-void __cpuinit xen_init_lock_cpu(int cpu)
-{
- int irq;
- const char *name;
-
- name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
- irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
- cpu,
- dummy_handler,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- name,
- NULL);
-
- if (irq >= 0) {
- disable_irq(irq); /* make sure it's never delivered */
- per_cpu(lock_kicker_irq, cpu) = irq;
- }
-
- printk("cpu %d spinlock event irq %d\n", cpu, irq);
-}
-
-void xen_uninit_lock_cpu(int cpu)
-{
- unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
-}
-
-void __init xen_init_spinlocks(void)
-{
- BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
-
- pv_lock_ops.spin_is_locked = xen_spin_is_locked;
- pv_lock_ops.spin_is_contended = xen_spin_is_contended;
- pv_lock_ops.spin_lock = xen_spin_lock;
- pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
- pv_lock_ops.spin_trylock = xen_spin_trylock;
- pv_lock_ops.spin_unlock = xen_spin_unlock;
-}
-
-#ifdef CONFIG_XEN_DEBUG_FS
-
-static struct dentry *d_spin_debug;
-
-static int __init xen_spinlock_debugfs(void)
-{
- struct dentry *d_xen = xen_init_debugfs();
-
- if (d_xen == NULL)
- return -ENOMEM;
-
- d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
-
- debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
-
- debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
-
- debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
- debugfs_create_u32("taken_slow", 0444, d_spin_debug,
- &spinlock_stats.taken_slow);
- debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
- &spinlock_stats.taken_slow_nested);
- debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
- &spinlock_stats.taken_slow_pickup);
- debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
- &spinlock_stats.taken_slow_spurious);
- debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
- &spinlock_stats.taken_slow_irqenable);
-
- debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
- debugfs_create_u32("released_slow", 0444, d_spin_debug,
- &spinlock_stats.released_slow);
- debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
- &spinlock_stats.released_slow_kicked);
-
- debugfs_create_u64("time_spinning", 0444, d_spin_debug,
- &spinlock_stats.time_spinning);
- debugfs_create_u64("time_blocked", 0444, d_spin_debug,
- &spinlock_stats.time_blocked);
- debugfs_create_u64("time_total", 0444, d_spin_debug,
- &spinlock_stats.time_total);
-
- xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
- spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
- xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
- spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
- xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
- spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
-
- return 0;
-}
-fs_initcall(xen_spinlock_debugfs);
-
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/ANDROID_3.4.5/arch/x86/xen/suspend.c b/ANDROID_3.4.5/arch/x86/xen/suspend.c
deleted file mode 100644
index 45329c8c..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/suspend.c
+++ /dev/null
@@ -1,80 +0,0 @@
-#include <linux/types.h>
-#include <linux/clockchips.h>
-
-#include <xen/interface/xen.h>
-#include <xen/grant_table.h>
-#include <xen/events.h>
-
-#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
-#include <asm/fixmap.h>
-
-#include "xen-ops.h"
-#include "mmu.h"
-
-void xen_arch_pre_suspend(void)
-{
- xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
- xen_start_info->console.domU.mfn =
- mfn_to_pfn(xen_start_info->console.domU.mfn);
-
- BUG_ON(!irqs_disabled());
-
- HYPERVISOR_shared_info = &xen_dummy_shared_info;
- if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
- __pte_ma(0), 0))
- BUG();
-}
-
-void xen_arch_hvm_post_suspend(int suspend_cancelled)
-{
-#ifdef CONFIG_XEN_PVHVM
- int cpu;
- xen_hvm_init_shared_info();
- xen_callback_vector();
- xen_unplug_emulated_devices();
- if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
- for_each_online_cpu(cpu) {
- xen_setup_runstate_info(cpu);
- }
- }
-#endif
-}
-
-void xen_arch_post_suspend(int suspend_cancelled)
-{
- xen_build_mfn_list_list();
-
- xen_setup_shared_info();
-
- if (suspend_cancelled) {
- xen_start_info->store_mfn =
- pfn_to_mfn(xen_start_info->store_mfn);
- xen_start_info->console.domU.mfn =
- pfn_to_mfn(xen_start_info->console.domU.mfn);
- } else {
-#ifdef CONFIG_SMP
- BUG_ON(xen_cpu_initialized_map == NULL);
- cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
-#endif
- xen_vcpu_restore();
- }
-
-}
-
-static void xen_vcpu_notify_restore(void *data)
-{
- unsigned long reason = (unsigned long)data;
-
- /* Boot processor notified via generic timekeeping_resume() */
- if ( smp_processor_id() == 0)
- return;
-
- clockevents_notify(reason, NULL);
-}
-
-void xen_arch_resume(void)
-{
- on_each_cpu(xen_vcpu_notify_restore,
- (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/time.c b/ANDROID_3.4.5/arch/x86/xen/time.c
deleted file mode 100644
index 0296a952..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/time.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Xen time implementation.
- *
- * This is implemented in terms of a clocksource driver which uses
- * the hypervisor clock as a nanosecond timebase, and a clockevent
- * driver which uses the hypervisor's timer mechanism.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
-#include <linux/gfp.h>
-
-#include <asm/pvclock.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/events.h>
-#include <xen/features.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/vcpu.h>
-
-#include "xen-ops.h"
-
-/* Xen may fire a timer up to this many ns early */
-#define TIMER_SLOP 100000
-#define NS_PER_TICK (1000000000LL / HZ)
-
-/* runstate info updated by Xen */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen and blocked time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-static DEFINE_PER_CPU(u64, xen_residual_blocked);
-
-/* return an consistent snapshot of 64-bit time/counter value */
-static u64 get64(const u64 *p)
-{
- u64 ret;
-
- if (BITS_PER_LONG < 64) {
- u32 *p32 = (u32 *)p;
- u32 h, l;
-
- /*
- * Read high then low, and then make sure high is
- * still the same; this will only loop if low wraps
- * and carries into high.
- * XXX some clean way to make this endian-proof?
- */
- do {
- h = p32[1];
- barrier();
- l = p32[0];
- barrier();
- } while (p32[1] != h);
-
- ret = (((u64)h) << 32) | l;
- } else
- ret = *p;
-
- return ret;
-}
-
-/*
- * Runstate accounting
- */
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
-{
- u64 state_time;
- struct vcpu_runstate_info *state;
-
- BUG_ON(preemptible());
-
- state = &__get_cpu_var(xen_runstate);
-
- /*
- * The runstate info is always updated by the hypervisor on
- * the current CPU, so there's no need to use anything
- * stronger than a compiler barrier when fetching it.
- */
- do {
- state_time = get64(&state->state_entry_time);
- barrier();
- *res = *state;
- barrier();
- } while (get64(&state->state_entry_time) != state_time);
-}
-
-/* return true when a vcpu could run but has no real cpu to run on */
-bool xen_vcpu_stolen(int vcpu)
-{
- return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
-}
-
-void xen_setup_runstate_info(int cpu)
-{
- struct vcpu_register_runstate_memory_area area;
-
- area.addr.v = &per_cpu(xen_runstate, cpu);
-
- if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
- cpu, &area))
- BUG();
-}
-
-static void do_stolen_accounting(void)
-{
- struct vcpu_runstate_info state;
- struct vcpu_runstate_info *snap;
- s64 blocked, runnable, offline, stolen;
- cputime_t ticks;
-
- get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- snap = &__get_cpu_var(xen_runstate_snapshot);
-
- /* work out how much time the VCPU has not been runn*ing* */
- blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
- runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
- offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
- *snap = state;
-
- /* Add the appropriate number of ticks of stolen time,
- including any left-overs from last time. */
- stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
- if (stolen < 0)
- stolen = 0;
-
- ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
- __this_cpu_write(xen_residual_stolen, stolen);
- account_steal_ticks(ticks);
-
- /* Add the appropriate number of ticks of blocked time,
- including any left-overs from last time. */
- blocked += __this_cpu_read(xen_residual_blocked);
-
- if (blocked < 0)
- blocked = 0;
-
- ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
- __this_cpu_write(xen_residual_blocked, blocked);
- account_idle_ticks(ticks);
-}
-
-/* Get the TSC speed from Xen */
-static unsigned long xen_tsc_khz(void)
-{
- struct pvclock_vcpu_time_info *info =
- &HYPERVISOR_shared_info->vcpu_info[0].time;
-
- return pvclock_tsc_khz(info);
-}
-
-cycle_t xen_clocksource_read(void)
-{
- struct pvclock_vcpu_time_info *src;
- cycle_t ret;
-
- preempt_disable_notrace();
- src = &__get_cpu_var(xen_vcpu)->time;
- ret = pvclock_clocksource_read(src);
- preempt_enable_notrace();
- return ret;
-}
-
-static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
-{
- return xen_clocksource_read();
-}
-
-static void xen_read_wallclock(struct timespec *ts)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- struct pvclock_wall_clock *wall_clock = &(s->wc);
- struct pvclock_vcpu_time_info *vcpu_time;
-
- vcpu_time = &get_cpu_var(xen_vcpu)->time;
- pvclock_read_wallclock(wall_clock, vcpu_time, ts);
- put_cpu_var(xen_vcpu);
-}
-
-static unsigned long xen_get_wallclock(void)
-{
- struct timespec ts;
-
- xen_read_wallclock(&ts);
- return ts.tv_sec;
-}
-
-static int xen_set_wallclock(unsigned long now)
-{
- struct xen_platform_op op;
- int rc;
-
- /* do nothing for domU */
- if (!xen_initial_domain())
- return -1;
-
- op.cmd = XENPF_settime;
- op.u.settime.secs = now;
- op.u.settime.nsecs = 0;
- op.u.settime.system_time = xen_clocksource_read();
-
- rc = HYPERVISOR_dom0_op(&op);
- WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now);
-
- return rc;
-}
-
-static struct clocksource xen_clocksource __read_mostly = {
- .name = "xen",
- .rating = 400,
- .read = xen_clocksource_get_cycles,
- .mask = ~0,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-/*
- Xen clockevent implementation
-
- Xen has two clockevent implementations:
-
- The old timer_op one works with all released versions of Xen prior
- to version 3.0.4. This version of the hypervisor provides a
- single-shot timer with nanosecond resolution. However, sharing the
- same event channel is a 100Hz tick which is delivered while the
- vcpu is running. We don't care about or use this tick, but it will
- cause the core time code to think the timer fired too soon, and
- will end up resetting it each time. It could be filtered, but
- doing so has complications when the ktime clocksource is not yet
- the xen clocksource (ie, at boot time).
-
- The new vcpu_op-based timer interface allows the tick timer period
- to be changed or turned off. The tick timer is not useful as a
- periodic timer because events are only delivered to running vcpus.
- The one-shot timer can report when a timeout is in the past, so
- set_next_event is capable of returning -ETIME when appropriate.
- This interface is used when available.
-*/
-
-
-/*
- Get a hypervisor absolute time. In theory we could maintain an
- offset between the kernel's time and the hypervisor's time, and
- apply that to a kernel's absolute timeout. Unfortunately the
- hypervisor and kernel times can drift even if the kernel is using
- the Xen clocksource, because ntp can warp the kernel's clocksource.
-*/
-static s64 get_abs_timeout(unsigned long delta)
-{
- return xen_clocksource_read() + delta;
-}
-
-static void xen_timerop_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- /* unsupported */
- WARN_ON(1);
- break;
-
- case CLOCK_EVT_MODE_ONESHOT:
- case CLOCK_EVT_MODE_RESUME:
- break;
-
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- HYPERVISOR_set_timer_op(0); /* cancel timeout */
- break;
- }
-}
-
-static int xen_timerop_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
-
- if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
- BUG();
-
- /* We may have missed the deadline, but there's no real way of
- knowing for sure. If the event was in the past, then we'll
- get an immediate interrupt. */
-
- return 0;
-}
-
-static const struct clock_event_device xen_timerop_clockevent = {
- .name = "xen",
- .features = CLOCK_EVT_FEAT_ONESHOT,
-
- .max_delta_ns = 0xffffffff,
- .min_delta_ns = TIMER_SLOP,
-
- .mult = 1,
- .shift = 0,
- .rating = 500,
-
- .set_mode = xen_timerop_set_mode,
- .set_next_event = xen_timerop_set_next_event,
-};
-
-
-
-static void xen_vcpuop_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- int cpu = smp_processor_id();
-
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- WARN_ON(1); /* unsupported */
- break;
-
- case CLOCK_EVT_MODE_ONESHOT:
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
- BUG();
- break;
-
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
- HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
- BUG();
- break;
- case CLOCK_EVT_MODE_RESUME:
- break;
- }
-}
-
-static int xen_vcpuop_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- int cpu = smp_processor_id();
- struct vcpu_set_singleshot_timer single;
- int ret;
-
- WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
-
- single.timeout_abs_ns = get_abs_timeout(delta);
- single.flags = VCPU_SSHOTTMR_future;
-
- ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
-
- BUG_ON(ret != 0 && ret != -ETIME);
-
- return ret;
-}
-
-static const struct clock_event_device xen_vcpuop_clockevent = {
- .name = "xen",
- .features = CLOCK_EVT_FEAT_ONESHOT,
-
- .max_delta_ns = 0xffffffff,
- .min_delta_ns = TIMER_SLOP,
-
- .mult = 1,
- .shift = 0,
- .rating = 500,
-
- .set_mode = xen_vcpuop_set_mode,
- .set_next_event = xen_vcpuop_set_next_event,
-};
-
-static const struct clock_event_device *xen_clockevent =
- &xen_timerop_clockevent;
-static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
-
-static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
-{
- struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
- irqreturn_t ret;
-
- ret = IRQ_NONE;
- if (evt->event_handler) {
- evt->event_handler(evt);
- ret = IRQ_HANDLED;
- }
-
- do_stolen_accounting();
-
- return ret;
-}
-
-void xen_setup_timer(int cpu)
-{
- const char *name;
- struct clock_event_device *evt;
- int irq;
-
- printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
-
- name = kasprintf(GFP_KERNEL, "timer%d", cpu);
- if (!name)
- name = "<timer kasprintf failed>";
-
- irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|
- IRQF_NOBALANCING|IRQF_TIMER|
- IRQF_FORCE_RESUME,
- name, NULL);
-
- evt = &per_cpu(xen_clock_events, cpu);
- memcpy(evt, xen_clockevent, sizeof(*evt));
-
- evt->cpumask = cpumask_of(cpu);
- evt->irq = irq;
-}
-
-void xen_teardown_timer(int cpu)
-{
- struct clock_event_device *evt;
- BUG_ON(cpu == 0);
- evt = &per_cpu(xen_clock_events, cpu);
- unbind_from_irqhandler(evt->irq, NULL);
-}
-
-void xen_setup_cpu_clockevents(void)
-{
- BUG_ON(preemptible());
-
- clockevents_register_device(&__get_cpu_var(xen_clock_events));
-}
-
-void xen_timer_resume(void)
-{
- int cpu;
-
- pvclock_resume();
-
- if (xen_clockevent != &xen_vcpuop_clockevent)
- return;
-
- for_each_online_cpu(cpu) {
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
- BUG();
- }
-}
-
-static const struct pv_time_ops xen_time_ops __initconst = {
- .sched_clock = xen_clocksource_read,
-};
-
-static void __init xen_time_init(void)
-{
- int cpu = smp_processor_id();
- struct timespec tp;
-
- clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
-
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
- /* Successfully turned off 100Hz tick, so we have the
- vcpuop-based timer interface */
- printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
- xen_clockevent = &xen_vcpuop_clockevent;
- }
-
- /* Set initial system time with full resolution */
- xen_read_wallclock(&tp);
- do_settimeofday(&tp);
-
- setup_force_cpu_cap(X86_FEATURE_TSC);
-
- xen_setup_runstate_info(cpu);
- xen_setup_timer(cpu);
- xen_setup_cpu_clockevents();
-}
-
-void __init xen_init_time_ops(void)
-{
- pv_time_ops = xen_time_ops;
-
- x86_init.timers.timer_init = xen_time_init;
- x86_init.timers.setup_percpu_clockev = x86_init_noop;
- x86_cpuinit.setup_percpu_clockev = x86_init_noop;
-
- x86_platform.calibrate_tsc = xen_tsc_khz;
- x86_platform.get_wallclock = xen_get_wallclock;
- x86_platform.set_wallclock = xen_set_wallclock;
-}
-
-#ifdef CONFIG_XEN_PVHVM
-static void xen_hvm_setup_cpu_clockevents(void)
-{
- int cpu = smp_processor_id();
- xen_setup_runstate_info(cpu);
- xen_setup_timer(cpu);
- xen_setup_cpu_clockevents();
-}
-
-void __init xen_hvm_init_time_ops(void)
-{
- /* vector callback is needed otherwise we cannot receive interrupts
- * on cpu > 0 and at this point we don't know how many cpus are
- * available */
- if (!xen_have_vector_callback)
- return;
- if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
- printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
- "disable pv timer\n");
- return;
- }
-
- pv_time_ops = xen_time_ops;
- x86_init.timers.setup_percpu_clockev = xen_time_init;
- x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
-
- x86_platform.calibrate_tsc = xen_tsc_khz;
- x86_platform.get_wallclock = xen_get_wallclock;
- x86_platform.set_wallclock = xen_set_wallclock;
-}
-#endif
diff --git a/ANDROID_3.4.5/arch/x86/xen/trace.c b/ANDROID_3.4.5/arch/x86/xen/trace.c
deleted file mode 100644
index 520022d1..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/trace.c
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <linux/ftrace.h>
-#include <xen/interface/xen.h>
-
-#define N(x) [__HYPERVISOR_##x] = "("#x")"
-static const char *xen_hypercall_names[] = {
- N(set_trap_table),
- N(mmu_update),
- N(set_gdt),
- N(stack_switch),
- N(set_callbacks),
- N(fpu_taskswitch),
- N(sched_op_compat),
- N(dom0_op),
- N(set_debugreg),
- N(get_debugreg),
- N(update_descriptor),
- N(memory_op),
- N(multicall),
- N(update_va_mapping),
- N(set_timer_op),
- N(event_channel_op_compat),
- N(xen_version),
- N(console_io),
- N(physdev_op_compat),
- N(grant_table_op),
- N(vm_assist),
- N(update_va_mapping_otherdomain),
- N(iret),
- N(vcpu_op),
- N(set_segment_base),
- N(mmuext_op),
- N(acm_op),
- N(nmi_op),
- N(sched_op),
- N(callback_op),
- N(xenoprof_op),
- N(event_channel_op),
- N(physdev_op),
- N(hvm_op),
-
-/* Architecture-specific hypercall definitions. */
- N(arch_0),
- N(arch_1),
- N(arch_2),
- N(arch_3),
- N(arch_4),
- N(arch_5),
- N(arch_6),
- N(arch_7),
-};
-#undef N
-
-static const char *xen_hypercall_name(unsigned op)
-{
- if (op < ARRAY_SIZE(xen_hypercall_names) && xen_hypercall_names[op] != NULL)
- return xen_hypercall_names[op];
-
- return "";
-}
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/xen.h>
diff --git a/ANDROID_3.4.5/arch/x86/xen/vdso.h b/ANDROID_3.4.5/arch/x86/xen/vdso.h
deleted file mode 100644
index 861fedfe..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/vdso.h
+++ /dev/null
@@ -1,4 +0,0 @@
-/* Bit used for the pseudo-hwcap for non-negative segments. We use
- bit 1 to avoid bugs in some versions of glibc when bit 0 is
- used; the choice is otherwise arbitrary. */
-#define VDSO_NOTE_NONEGSEG_BIT 1
diff --git a/ANDROID_3.4.5/arch/x86/xen/vga.c b/ANDROID_3.4.5/arch/x86/xen/vga.c
deleted file mode 100644
index 1cd7f4d1..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/vga.c
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <linux/screen_info.h>
-#include <linux/init.h>
-
-#include <asm/bootparam.h>
-#include <asm/setup.h>
-
-#include <xen/interface/xen.h>
-
-#include "xen-ops.h"
-
-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
-{
- struct screen_info *screen_info = &boot_params.screen_info;
-
- /* This is drawn from a dump from vgacon:startup in
- * standard Linux. */
- screen_info->orig_video_mode = 3;
- screen_info->orig_video_isVGA = 1;
- screen_info->orig_video_lines = 25;
- screen_info->orig_video_cols = 80;
- screen_info->orig_video_ega_bx = 3;
- screen_info->orig_video_points = 16;
- screen_info->orig_y = screen_info->orig_video_lines - 1;
-
- switch (info->video_type) {
- case XEN_VGATYPE_TEXT_MODE_3:
- if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
- + sizeof(info->u.text_mode_3))
- break;
- screen_info->orig_video_lines = info->u.text_mode_3.rows;
- screen_info->orig_video_cols = info->u.text_mode_3.columns;
- screen_info->orig_x = info->u.text_mode_3.cursor_x;
- screen_info->orig_y = info->u.text_mode_3.cursor_y;
- screen_info->orig_video_points =
- info->u.text_mode_3.font_height;
- break;
-
- case XEN_VGATYPE_VESA_LFB:
- if (size < offsetof(struct dom0_vga_console_info,
- u.vesa_lfb.gbl_caps))
- break;
- screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB;
- screen_info->lfb_width = info->u.vesa_lfb.width;
- screen_info->lfb_height = info->u.vesa_lfb.height;
- screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel;
- screen_info->lfb_base = info->u.vesa_lfb.lfb_base;
- screen_info->lfb_size = info->u.vesa_lfb.lfb_size;
- screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line;
- screen_info->red_size = info->u.vesa_lfb.red_size;
- screen_info->red_pos = info->u.vesa_lfb.red_pos;
- screen_info->green_size = info->u.vesa_lfb.green_size;
- screen_info->green_pos = info->u.vesa_lfb.green_pos;
- screen_info->blue_size = info->u.vesa_lfb.blue_size;
- screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
- screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
- screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
- if (size >= offsetof(struct dom0_vga_console_info,
- u.vesa_lfb.gbl_caps)
- + sizeof(info->u.vesa_lfb.gbl_caps))
- screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
- if (size >= offsetof(struct dom0_vga_console_info,
- u.vesa_lfb.mode_attrs)
- + sizeof(info->u.vesa_lfb.mode_attrs))
- screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
- break;
- }
-}
diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm.S b/ANDROID_3.4.5/arch/x86/xen/xen-asm.S
deleted file mode 100644
index 3e45aa00..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/xen-asm.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining. The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
- *
- * We only bother with direct forms (ie, vcpu in percpu data) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-
-#include "xen-asm.h"
-
-/*
- * Enable events. This clears the event mask and tests the pending
- * event status with one and operation. If there are pending events,
- * then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
- /* Unmask events */
- movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-
- /*
- * Preempt here doesn't matter because that will deal with any
- * pending interrupts. The pending check may end up being run
- * on the wrong CPU, but that doesn't hurt.
- */
-
- /* Test for pending */
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
- jz 1f
-
-2: call check_events
-1:
-ENDPATCH(xen_irq_enable_direct)
- ret
- ENDPROC(xen_irq_enable_direct)
- RELOC(xen_irq_enable_direct, 2b+1)
-
-
-/*
- * Disabling events is simply a matter of making the event mask
- * non-zero.
- */
-ENTRY(xen_irq_disable_direct)
- movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
- ret
- ENDPROC(xen_irq_disable_direct)
- RELOC(xen_irq_disable_direct, 0)
-
-/*
- * (xen_)save_fl is used to get the current interrupt enable status.
- * Callers expect the status to be in X86_EFLAGS_IF, and other bits
- * may be set in the return value. We take advantage of this by
- * making sure that X86_EFLAGS_IF has the right value (and other bits
- * in that byte are 0), but other bits in the return value are
- * undefined. We need to toggle the state of the bit, because Xen and
- * x86 use opposite senses (mask vs enable).
- */
-ENTRY(xen_save_fl_direct)
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
- setz %ah
- addb %ah, %ah
-ENDPATCH(xen_save_fl_direct)
- ret
- ENDPROC(xen_save_fl_direct)
- RELOC(xen_save_fl_direct, 0)
-
-
-/*
- * In principle the caller should be passing us a value return from
- * xen_save_fl_direct, but for robustness sake we test only the
- * X86_EFLAGS_IF flag rather than the whole byte. After setting the
- * interrupt mask state, it checks for unmasked pending events and
- * enters the hypervisor to get them delivered if so.
- */
-ENTRY(xen_restore_fl_direct)
-#ifdef CONFIG_X86_64
- testw $X86_EFLAGS_IF, %di
-#else
- testb $X86_EFLAGS_IF>>8, %ah
-#endif
- setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
- /*
- * Preempt here doesn't matter because that will deal with any
- * pending interrupts. The pending check may end up being run
- * on the wrong CPU, but that doesn't hurt.
- */
-
- /* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
- jnz 1f
-2: call check_events
-1:
-ENDPATCH(xen_restore_fl_direct)
- ret
- ENDPROC(xen_restore_fl_direct)
- RELOC(xen_restore_fl_direct, 2b+1)
-
-
-/*
- * Force an event check by making a hypercall, but preserve regs
- * before making the call.
- */
-check_events:
-#ifdef CONFIG_X86_32
- push %eax
- push %ecx
- push %edx
- call xen_force_evtchn_callback
- pop %edx
- pop %ecx
- pop %eax
-#else
- push %rax
- push %rcx
- push %rdx
- push %rsi
- push %rdi
- push %r8
- push %r9
- push %r10
- push %r11
- call xen_force_evtchn_callback
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rdi
- pop %rsi
- pop %rdx
- pop %rcx
- pop %rax
-#endif
- ret
diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm.h b/ANDROID_3.4.5/arch/x86/xen/xen-asm.h
deleted file mode 100644
index 46527646..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/xen-asm.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _XEN_XEN_ASM_H
-#define _XEN_XEN_ASM_H
-
-#include <linux/linkage.h>
-
-#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x) .globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI 0x80000000
-
-#endif
diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm_32.S b/ANDROID_3.4.5/arch/x86/xen/xen-asm_32.S
deleted file mode 100644
index b040b0e5..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/xen-asm_32.S
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining. The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
- *
- * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
- */
-
-#include <asm/thread_info.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
-#include <xen/interface/xen.h>
-
-#include "xen-asm.h"
-
-/*
- * Force an event check by making a hypercall, but preserve regs
- * before making the call.
- */
-check_events:
- push %eax
- push %ecx
- push %edx
- call xen_force_evtchn_callback
- pop %edx
- pop %ecx
- pop %eax
- ret
-
-/*
- * We can't use sysexit directly, because we're not running in ring0.
- * But we can easily fake it up using iret. Assuming xen_sysexit is
- * jumped to with a standard stack frame, we can just strip it back to
- * a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
- movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
- orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
- lea PT_EIP(%esp), %esp
-
- jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
- * This is run where a normal iret would be run, with the same stack setup:
- * 8: eflags
- * 4: cs
- * esp-> 0: eip
- *
- * This attempts to make sure that any pending events are dealt with
- * on return to usermode, but there is a small window in which an
- * event can happen just before entering usermode. If the nested
- * interrupt ends up setting one of the TIF_WORK_MASK pending work
- * flags, they will not be tested again before returning to
- * usermode. This means that a process can end up with pending work,
- * which will be unprocessed until the process enters and leaves the
- * kernel again, which could be an unbounded amount of time. This
- * means that a pending signal or reschedule event could be
- * indefinitely delayed.
- *
- * The fix is to notice a nested interrupt in the critical window, and
- * if one occurs, then fold the nested interrupt into the current
- * interrupt stack frame, and re-process it iteratively rather than
- * recursively. This means that it will exit via the normal path, and
- * all pending work will be dealt with appropriately.
- *
- * Because the nested interrupt handler needs to deal with the current
- * stack state in whatever form its in, we keep things simple by only
- * using a single register which is pushed/popped on the stack.
- */
-ENTRY(xen_iret)
- /* test eflags for special cases */
- testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
- jnz hyper_iret
-
- push %eax
- ESP_OFFSET=4 # bytes pushed onto stack
-
- /*
- * Store vcpu_info pointer for easy access. Do it this way to
- * avoid having to reload %fs
- */
-#ifdef CONFIG_SMP
- GET_THREAD_INFO(%eax)
- movl TI_cpu(%eax), %eax
- movl __per_cpu_offset(,%eax,4), %eax
- mov xen_vcpu(%eax), %eax
-#else
- movl xen_vcpu, %eax
-#endif
-
- /* check IF state we're restoring */
- testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
-
- /*
- * Maybe enable events. Once this happens we could get a
- * recursive event, so the critical region starts immediately
- * afterwards. However, if that happens we don't end up
- * resuming the code, so we don't have to be worried about
- * being preempted to another CPU.
- */
- setz XEN_vcpu_info_mask(%eax)
-xen_iret_start_crit:
-
- /* check for unmasked and pending */
- cmpw $0x0001, XEN_vcpu_info_pending(%eax)
-
- /*
- * If there's something pending, mask events again so we can
- * jump back into xen_hypervisor_callback. Otherwise do not
- * touch XEN_vcpu_info_mask.
- */
- jne 1f
- movb $1, XEN_vcpu_info_mask(%eax)
-
-1: popl %eax
-
- /*
- * From this point on the registers are restored and the stack
- * updated, so we don't need to worry about it if we're
- * preempted
- */
-iret_restore_end:
-
- /*
- * Jump to hypervisor_callback after fixing up the stack.
- * Events are masked, so jumping out of the critical region is
- * OK.
- */
- je xen_hypervisor_callback
-
-1: iret
-xen_iret_end_crit:
-.section __ex_table, "a"
- .align 4
- .long 1b, iret_exc
-.previous
-
-hyper_iret:
- /* put this out of line since its very rarely used */
- jmp hypercall_page + __HYPERVISOR_iret * 32
-
- .globl xen_iret_start_crit, xen_iret_end_crit
-
-/*
- * This is called by xen_hypervisor_callback in entry.S when it sees
- * that the EIP at the time of interrupt was between
- * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
- * %eax so we can do a more refined determination of what to do.
- *
- * The stack format at this point is:
- * ----------------
- * ss : (ss/esp may be present if we came from usermode)
- * esp :
- * eflags } outer exception info
- * cs }
- * eip }
- * ---------------- <- edi (copy dest)
- * eax : outer eax if it hasn't been restored
- * ----------------
- * eflags } nested exception info
- * cs } (no ss/esp because we're nested
- * eip } from the same ring)
- * orig_eax }<- esi (copy src)
- * - - - - - - - -
- * fs }
- * es }
- * ds } SAVE_ALL state
- * eax }
- * : :
- * ebx }<- esp
- * ----------------
- *
- * In order to deliver the nested exception properly, we need to shift
- * everything from the return addr up to the error code so it sits
- * just under the outer exception info. This means that when we
- * handle the exception, we do it in the context of the outer
- * exception rather than starting a new one.
- *
- * The only caveat is that if the outer eax hasn't been restored yet
- * (ie, it's still on stack), we need to insert its value into the
- * SAVE_ALL state before going on, since it's usermode state which we
- * eventually need to restore.
- */
-ENTRY(xen_iret_crit_fixup)
- /*
- * Paranoia: Make sure we're really coming from kernel space.
- * One could imagine a case where userspace jumps into the
- * critical range address, but just before the CPU delivers a
- * GP, it decides to deliver an interrupt instead. Unlikely?
- * Definitely. Easy to avoid? Yes. The Intel documents
- * explicitly say that the reported EIP for a bad jump is the
- * jump instruction itself, not the destination, but some
- * virtual environments get this wrong.
- */
- movl PT_CS(%esp), %ecx
- andl $SEGMENT_RPL_MASK, %ecx
- cmpl $USER_RPL, %ecx
- je 2f
-
- lea PT_ORIG_EAX(%esp), %esi
- lea PT_EFLAGS(%esp), %edi
-
- /*
- * If eip is before iret_restore_end then stack
- * hasn't been restored yet.
- */
- cmp $iret_restore_end, %eax
- jae 1f
-
- movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
- movl %eax, PT_EAX(%esp)
-
- lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
-
- /* set up the copy */
-1: std
- mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
- rep movsl
- cld
-
- lea 4(%edi), %esp /* point esp to new frame */
-2: jmp xen_do_upcall
-
diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm_64.S b/ANDROID_3.4.5/arch/x86/xen/xen-asm_64.S
deleted file mode 100644
index 53adefda..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/xen-asm_64.S
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining. The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
- *
- * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
- */
-
-#include <asm/errno.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
-#include <xen/interface/xen.h>
-
-#include "xen-asm.h"
-
-ENTRY(xen_adjust_exception_frame)
- mov 8+0(%rsp), %rcx
- mov 8+8(%rsp), %r11
- ret $16
-
-hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
-/*
- * Xen64 iret frame:
- *
- * ss
- * rsp
- * rflags
- * cs
- * rip <-- standard iret frame
- *
- * flags
- *
- * rcx }
- * r11 }<-- pushed by hypercall page
- * rsp->rax }
- */
-ENTRY(xen_iret)
- pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_iret)
-RELOC(xen_iret, 1b+1)
-
-/*
- * sysexit is not used for 64-bit processes, so it's only ever used to
- * return to 32-bit compat userspace.
- */
-ENTRY(xen_sysexit)
- pushq $__USER32_DS
- pushq %rcx
- pushq $X86_EFLAGS_IF
- pushq $__USER32_CS
- pushq %rdx
-
- pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_sysexit)
-RELOC(xen_sysexit, 1b+1)
-
-ENTRY(xen_sysret64)
- /*
- * We're already on the usermode stack at this point, but
- * still with the kernel gs, so we can easily switch back
- */
- movq %rsp, PER_CPU_VAR(old_rsp)
- movq PER_CPU_VAR(kernel_stack), %rsp
-
- pushq $__USER_DS
- pushq PER_CPU_VAR(old_rsp)
- pushq %r11
- pushq $__USER_CS
- pushq %rcx
-
- pushq $VGCF_in_syscall
-1: jmp hypercall_iret
-ENDPATCH(xen_sysret64)
-RELOC(xen_sysret64, 1b+1)
-
-ENTRY(xen_sysret32)
- /*
- * We're already on the usermode stack at this point, but
- * still with the kernel gs, so we can easily switch back
- */
- movq %rsp, PER_CPU_VAR(old_rsp)
- movq PER_CPU_VAR(kernel_stack), %rsp
-
- pushq $__USER32_DS
- pushq PER_CPU_VAR(old_rsp)
- pushq %r11
- pushq $__USER32_CS
- pushq %rcx
-
- pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_sysret32)
-RELOC(xen_sysret32, 1b+1)
-
-/*
- * Xen handles syscall callbacks much like ordinary exceptions, which
- * means we have:
- * - kernel gs
- * - kernel rsp
- * - an iret-like stack frame on the stack (including rcx and r11):
- * ss
- * rsp
- * rflags
- * cs
- * rip
- * r11
- * rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
- */
-
-.macro undo_xen_syscall
- mov 0*8(%rsp), %rcx
- mov 1*8(%rsp), %r11
- mov 5*8(%rsp), %rsp
-.endm
-
-/* Normal 64-bit system call target */
-ENTRY(xen_syscall_target)
- undo_xen_syscall
- jmp system_call_after_swapgs
-ENDPROC(xen_syscall_target)
-
-#ifdef CONFIG_IA32_EMULATION
-
-/* 32-bit compat syscall target */
-ENTRY(xen_syscall32_target)
- undo_xen_syscall
- jmp ia32_cstar_target
-ENDPROC(xen_syscall32_target)
-
-/* 32-bit compat sysenter target */
-ENTRY(xen_sysenter_target)
- undo_xen_syscall
- jmp ia32_sysenter_target
-ENDPROC(xen_sysenter_target)
-
-#else /* !CONFIG_IA32_EMULATION */
-
-ENTRY(xen_syscall32_target)
-ENTRY(xen_sysenter_target)
- lea 16(%rsp), %rsp /* strip %rcx, %r11 */
- mov $-ENOSYS, %rax
- pushq $0
- jmp hypercall_iret
-ENDPROC(xen_syscall32_target)
-ENDPROC(xen_sysenter_target)
-
-#endif /* CONFIG_IA32_EMULATION */
diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-head.S b/ANDROID_3.4.5/arch/x86/xen/xen-head.S
deleted file mode 100644
index aaa7291c..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/xen-head.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Xen-specific pieces of head.S, intended to be included in the right
- place in head.S */
-
-#ifdef CONFIG_XEN
-
-#include <linux/elfnote.h>
-#include <linux/init.h>
-
-#include <asm/boot.h>
-#include <asm/asm.h>
-#include <asm/page_types.h>
-
-#include <xen/interface/elfnote.h>
-#include <asm/xen/interface.h>
-
- __INIT
-ENTRY(startup_xen)
- cld
-#ifdef CONFIG_X86_32
- mov %esi,xen_start_info
- mov $init_thread_union+THREAD_SIZE,%esp
-#else
- mov %rsi,xen_start_info
- mov $init_thread_union+THREAD_SIZE,%rsp
-#endif
- jmp xen_start_kernel
-
- __FINIT
-
-.pushsection .text
- .align PAGE_SIZE
-ENTRY(hypercall_page)
- .skip PAGE_SIZE
-.popsection
-
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
- ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
-#ifdef CONFIG_X86_32
- ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET)
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map)
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
- ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
- .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
- ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
- ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
- ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)
-
-#endif /*CONFIG_XEN */
diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-ops.h b/ANDROID_3.4.5/arch/x86/xen/xen-ops.h
deleted file mode 100644
index b095739c..00000000
--- a/ANDROID_3.4.5/arch/x86/xen/xen-ops.h
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef XEN_OPS_H
-#define XEN_OPS_H
-
-#include <linux/init.h>
-#include <linux/clocksource.h>
-#include <linux/irqreturn.h>
-#include <xen/xen-ops.h>
-
-/* These are code, but not functions. Defined in entry.S */
-extern const char xen_hypervisor_callback[];
-extern const char xen_failsafe_callback[];
-
-extern void *xen_initial_gdt;
-
-struct trap_info;
-void xen_copy_trap_info(struct trap_info *traps);
-
-DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-DECLARE_PER_CPU(unsigned long, xen_cr3);
-DECLARE_PER_CPU(unsigned long, xen_current_cr3);
-
-extern struct start_info *xen_start_info;
-extern struct shared_info xen_dummy_shared_info;
-extern struct shared_info *HYPERVISOR_shared_info;
-
-void xen_setup_mfn_list_list(void);
-void xen_setup_shared_info(void);
-void xen_build_mfn_list_list(void);
-void xen_setup_machphys_mapping(void);
-pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
-void xen_ident_map_ISA(void);
-void xen_reserve_top(void);
-extern unsigned long xen_max_p2m_pfn;
-
-void xen_set_pat(u64);
-
-char * __init xen_memory_setup(void);
-void __init xen_arch_setup(void);
-void __init xen_init_IRQ(void);
-void xen_enable_sysenter(void);
-void xen_enable_syscall(void);
-void xen_vcpu_restore(void);
-
-void xen_callback_vector(void);
-void xen_hvm_init_shared_info(void);
-void xen_unplug_emulated_devices(void);
-
-void __init xen_build_dynamic_phys_to_machine(void);
-
-void xen_init_irq_ops(void);
-void xen_setup_timer(int cpu);
-void xen_setup_runstate_info(int cpu);
-void xen_teardown_timer(int cpu);
-cycle_t xen_clocksource_read(void);
-void xen_setup_cpu_clockevents(void);
-void __init xen_init_time_ops(void);
-void __init xen_hvm_init_time_ops(void);
-
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
-
-bool xen_vcpu_stolen(int vcpu);
-
-void xen_setup_vcpu_info_placement(void);
-
-#ifdef CONFIG_SMP
-void xen_smp_init(void);
-void __init xen_hvm_smp_init(void);
-
-extern cpumask_var_t xen_cpu_initialized_map;
-#else
-static inline void xen_smp_init(void) {}
-static inline void xen_hvm_smp_init(void) {}
-#endif
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-void __init xen_init_spinlocks(void);
-void __cpuinit xen_init_lock_cpu(int cpu);
-void xen_uninit_lock_cpu(int cpu);
-#else
-static inline void xen_init_spinlocks(void)
-{
-}
-static inline void xen_init_lock_cpu(int cpu)
-{
-}
-static inline void xen_uninit_lock_cpu(int cpu)
-{
-}
-#endif
-
-struct dom0_vga_console_info;
-
-#ifdef CONFIG_XEN_DOM0
-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
-#else
-static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
- size_t size)
-{
-}
-#endif
-
-/* Declare an asm function, along with symbols needed to make it
- inlineable */
-#define DECL_ASM(ret, name, ...) \
- ret name(__VA_ARGS__); \
- extern char name##_end[]; \
- extern char name##_reloc[] \
-
-DECL_ASM(void, xen_irq_enable_direct, void);
-DECL_ASM(void, xen_irq_disable_direct, void);
-DECL_ASM(unsigned long, xen_save_fl_direct, void);
-DECL_ASM(void, xen_restore_fl_direct, unsigned long);
-
-/* These are not functions, and cannot be called normally */
-void xen_iret(void);
-void xen_sysexit(void);
-void xen_sysret32(void);
-void xen_sysret64(void);
-void xen_adjust_exception_frame(void);
-
-extern int xen_panic_handler_init(void);
-
-#endif /* XEN_OPS_H */