diff options
Diffstat (limited to 'ANDROID_3.4.5/arch/x86/xen')
28 files changed, 0 insertions, 8792 deletions
diff --git a/ANDROID_3.4.5/arch/x86/xen/Kconfig b/ANDROID_3.4.5/arch/x86/xen/Kconfig deleted file mode 100644 index fdce49c7..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/Kconfig +++ /dev/null @@ -1,52 +0,0 @@ -# -# This Kconfig describes xen options -# - -config XEN - bool "Xen guest support" - select PARAVIRT - select PARAVIRT_CLOCK - depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) - depends on X86_CMPXCHG && X86_TSC - help - This is the Linux Xen port. Enabling this will allow the - kernel to boot in a paravirtualized environment under the - Xen hypervisor. - -config XEN_DOM0 - def_bool y - depends on XEN && PCI_XEN && SWIOTLB_XEN - depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI - -# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST -# name in tools. -config XEN_PRIVILEGED_GUEST - def_bool XEN_DOM0 - -config XEN_PVHVM - def_bool y - depends on XEN && PCI && X86_LOCAL_APIC - -config XEN_MAX_DOMAIN_MEMORY - int - default 500 if X86_64 - default 64 if X86_32 - depends on XEN - help - This only affects the sizing of some bss arrays, the unused - portions of which are freed. - -config XEN_SAVE_RESTORE - bool - depends on XEN - select HIBERNATE_CALLBACKS - default y - -config XEN_DEBUG_FS - bool "Enable Xen debug and tuning parameters in debugfs" - depends on XEN && DEBUG_FS - default n - help - Enable statistics output and various tuning options in debugfs. - Enabling this option may incur a significant performance overhead. - diff --git a/ANDROID_3.4.5/arch/x86/xen/Makefile b/ANDROID_3.4.5/arch/x86/xen/Makefile deleted file mode 100644 index add2c2d7..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -ifdef CONFIG_FUNCTION_TRACER -# Do not profile debug and lowlevel utilities -CFLAGS_REMOVE_spinlock.o = -pg -CFLAGS_REMOVE_time.o = -pg -CFLAGS_REMOVE_irq.o = -pg -endif - -# Make sure early boot has no stackprotector -nostackp := $(call cc-option, -fno-stack-protector) -CFLAGS_enlighten.o := $(nostackp) -CFLAGS_mmu.o := $(nostackp) - -obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ - time.o xen-asm.o xen-asm_$(BITS).o \ - grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o - -obj-$(CONFIG_EVENT_TRACING) += trace.o - -obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o -obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += vga.o -obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o diff --git a/ANDROID_3.4.5/arch/x86/xen/debugfs.c b/ANDROID_3.4.5/arch/x86/xen/debugfs.c deleted file mode 100644 index ef1db190..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/debugfs.c +++ /dev/null @@ -1,125 +0,0 @@ -#include <linux/init.h> -#include <linux/debugfs.h> -#include <linux/slab.h> -#include <linux/module.h> - -#include "debugfs.h" - -static struct dentry *d_xen_debug; - -struct dentry * __init xen_init_debugfs(void) -{ - if (!d_xen_debug) { - d_xen_debug = debugfs_create_dir("xen", NULL); - - if (!d_xen_debug) - pr_warning("Could not create 'xen' debugfs directory\n"); - } - - return d_xen_debug; -} - -struct array_data -{ - void *array; - unsigned elements; -}; - -static int u32_array_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return nonseekable_open(inode, file); -} - -static size_t format_array(char *buf, size_t bufsize, const char *fmt, - u32 *array, unsigned array_size) -{ - size_t ret = 0; - unsigned i; - - for(i = 0; i < array_size; i++) { - size_t len; - - len = snprintf(buf, bufsize, fmt, array[i]); - len++; /* ' ' or '\n' */ - ret += len; - - if (buf) { - buf += len; - bufsize -= len; - buf[-1] = (i == array_size-1) ? '\n' : ' '; - } - } - - ret++; /* \0 */ - if (buf) - *buf = '\0'; - - return ret; -} - -static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size) -{ - size_t len = format_array(NULL, 0, fmt, array, array_size); - char *ret; - - ret = kmalloc(len, GFP_KERNEL); - if (ret == NULL) - return NULL; - - format_array(ret, len, fmt, array, array_size); - return ret; -} - -static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, - loff_t *ppos) -{ - struct inode *inode = file->f_path.dentry->d_inode; - struct array_data *data = inode->i_private; - size_t size; - - if (*ppos == 0) { - if (file->private_data) { - kfree(file->private_data); - file->private_data = NULL; - } - - file->private_data = format_array_alloc("%u", data->array, data->elements); - } - - size = 0; - if (file->private_data) - size = strlen(file->private_data); - - return simple_read_from_buffer(buf, len, ppos, file->private_data, size); -} - -static int xen_array_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); - - return 0; -} - -static const struct file_operations u32_array_fops = { - .owner = THIS_MODULE, - .open = u32_array_open, - .release= xen_array_release, - .read = u32_array_read, - .llseek = no_llseek, -}; - -struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, unsigned elements) -{ - struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); - - if (data == NULL) - return NULL; - - data->array = array; - data->elements = elements; - - return debugfs_create_file(name, mode, parent, data, &u32_array_fops); -} diff --git a/ANDROID_3.4.5/arch/x86/xen/debugfs.h b/ANDROID_3.4.5/arch/x86/xen/debugfs.h deleted file mode 100644 index 78d25499..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/debugfs.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _XEN_DEBUGFS_H -#define _XEN_DEBUGFS_H - -struct dentry * __init xen_init_debugfs(void); - -struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, unsigned elements); - -#endif /* _XEN_DEBUGFS_H */ diff --git a/ANDROID_3.4.5/arch/x86/xen/enlighten.c b/ANDROID_3.4.5/arch/x86/xen/enlighten.c deleted file mode 100644 index 40edfc37..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/enlighten.c +++ /dev/null @@ -1,1557 +0,0 @@ -/* - * Core of Xen paravirt_ops implementation. - * - * This file contains the xen_paravirt_ops structure itself, and the - * implementations for: - * - privileged instructions - * - interrupt flags - * - segment operations - * - booting and setup - * - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ - -#include <linux/cpu.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/preempt.h> -#include <linux/hardirq.h> -#include <linux/percpu.h> -#include <linux/delay.h> -#include <linux/start_kernel.h> -#include <linux/sched.h> -#include <linux/kprobes.h> -#include <linux/bootmem.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/page-flags.h> -#include <linux/highmem.h> -#include <linux/console.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/memblock.h> - -#include <xen/xen.h> -#include <xen/interface/xen.h> -#include <xen/interface/version.h> -#include <xen/interface/physdev.h> -#include <xen/interface/vcpu.h> -#include <xen/interface/memory.h> -#include <xen/features.h> -#include <xen/page.h> -#include <xen/hvm.h> -#include <xen/hvc-console.h> - -#include <asm/paravirt.h> -#include <asm/apic.h> -#include <asm/page.h> -#include <asm/xen/pci.h> -#include <asm/xen/hypercall.h> -#include <asm/xen/hypervisor.h> -#include <asm/fixmap.h> -#include <asm/processor.h> -#include <asm/proto.h> -#include <asm/msr-index.h> -#include <asm/traps.h> -#include <asm/setup.h> -#include <asm/desc.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/reboot.h> -#include <asm/stackprotector.h> -#include <asm/hypervisor.h> -#include <asm/mwait.h> -#include <asm/pci_x86.h> - -#ifdef CONFIG_ACPI -#include <linux/acpi.h> -#include <asm/acpi.h> -#include <acpi/pdc_intel.h> -#include <acpi/processor.h> -#include <xen/interface/platform.h> -#endif - -#include "xen-ops.h" -#include "mmu.h" -#include "multicalls.h" - -EXPORT_SYMBOL_GPL(hypercall_page); - -DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); - -enum xen_domain_type xen_domain_type = XEN_NATIVE; -EXPORT_SYMBOL_GPL(xen_domain_type); - -unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; -EXPORT_SYMBOL(machine_to_phys_mapping); -unsigned long machine_to_phys_nr; -EXPORT_SYMBOL(machine_to_phys_nr); - -struct start_info *xen_start_info; -EXPORT_SYMBOL_GPL(xen_start_info); - -struct shared_info xen_dummy_shared_info; - -void *xen_initial_gdt; - -RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); -__read_mostly int xen_have_vector_callback; -EXPORT_SYMBOL_GPL(xen_have_vector_callback); - -/* - * Point at some empty memory to start with. We map the real shared_info - * page as soon as fixmap is up and running. - */ -struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; - -/* - * Flag to determine whether vcpu info placement is available on all - * VCPUs. We assume it is to start with, and then set it to zero on - * the first failure. This is because it can succeed on some VCPUs - * and not others, since it can involve hypervisor memory allocation, - * or because the guest failed to guarantee all the appropriate - * constraints on all VCPUs (ie buffer can't cross a page boundary). - * - * Note that any particular CPU may be using a placed vcpu structure, - * but we can only optimise if the all are. - * - * 0: not available, 1: available - */ -static int have_vcpu_info_placement = 1; - -static void clamp_max_cpus(void) -{ -#ifdef CONFIG_SMP - if (setup_max_cpus > MAX_VIRT_CPUS) - setup_max_cpus = MAX_VIRT_CPUS; -#endif -} - -static void xen_vcpu_setup(int cpu) -{ - struct vcpu_register_vcpu_info info; - int err; - struct vcpu_info *vcpup; - - BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); - - if (cpu < MAX_VIRT_CPUS) - per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - - if (!have_vcpu_info_placement) { - if (cpu >= MAX_VIRT_CPUS) - clamp_max_cpus(); - return; - } - - vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - /* Check to see if the hypervisor will put the vcpu_info - structure where we want it, which allows direct access via - a percpu-variable. */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); - - if (err) { - printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); - have_vcpu_info_placement = 0; - clamp_max_cpus(); - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; - } -} - -/* - * On restore, set the vcpu placement up again. - * If it fails, then we're in a bad state, since - * we can't back out from using it... - */ -void xen_vcpu_restore(void) -{ - int cpu; - - for_each_online_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); - - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) - BUG(); - - xen_setup_runstate_info(cpu); - - if (have_vcpu_info_placement) - xen_vcpu_setup(cpu); - - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) - BUG(); - } -} - -static void __init xen_banner(void) -{ - unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); - struct xen_extraversion extra; - HYPERVISOR_xen_version(XENVER_extraversion, &extra); - - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - pv_info.name); - printk(KERN_INFO "Xen version: %d.%d%s%s\n", - version >> 16, version & 0xffff, extra.extraversion, - xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); -} - -#define CPUID_THERM_POWER_LEAF 6 -#define APERFMPERF_PRESENT 0 - -static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; -static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; - -static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask; -static __read_mostly unsigned int cpuid_leaf5_ecx_val; -static __read_mostly unsigned int cpuid_leaf5_edx_val; - -static void xen_cpuid(unsigned int *ax, unsigned int *bx, - unsigned int *cx, unsigned int *dx) -{ - unsigned maskebx = ~0; - unsigned maskecx = ~0; - unsigned maskedx = ~0; - unsigned setecx = 0; - /* - * Mask out inconvenient features, to try and disable as many - * unsupported kernel subsystems as possible. - */ - switch (*ax) { - case 1: - maskecx = cpuid_leaf1_ecx_mask; - setecx = cpuid_leaf1_ecx_set_mask; - maskedx = cpuid_leaf1_edx_mask; - break; - - case CPUID_MWAIT_LEAF: - /* Synthesize the values.. */ - *ax = 0; - *bx = 0; - *cx = cpuid_leaf5_ecx_val; - *dx = cpuid_leaf5_edx_val; - return; - - case CPUID_THERM_POWER_LEAF: - /* Disabling APERFMPERF for kernel usage */ - maskecx = ~(1 << APERFMPERF_PRESENT); - break; - - case 0xb: - /* Suppress extended topology stuff */ - maskebx = 0; - break; - } - - asm(XEN_EMULATE_PREFIX "cpuid" - : "=a" (*ax), - "=b" (*bx), - "=c" (*cx), - "=d" (*dx) - : "0" (*ax), "2" (*cx)); - - *bx &= maskebx; - *cx &= maskecx; - *cx |= setecx; - *dx &= maskedx; - -} - -static bool __init xen_check_mwait(void) -{ -#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ - !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) - struct xen_platform_op op = { - .cmd = XENPF_set_processor_pminfo, - .u.set_pminfo.id = -1, - .u.set_pminfo.type = XEN_PM_PDC, - }; - uint32_t buf[3]; - unsigned int ax, bx, cx, dx; - unsigned int mwait_mask; - - /* We need to determine whether it is OK to expose the MWAIT - * capability to the kernel to harvest deeper than C3 states from ACPI - * _CST using the processor_harvest_xen.c module. For this to work, we - * need to gather the MWAIT_LEAF values (which the cstate.c code - * checks against). The hypervisor won't expose the MWAIT flag because - * it would break backwards compatibility; so we will find out directly - * from the hardware and hypercall. - */ - if (!xen_initial_domain()) - return false; - - ax = 1; - cx = 0; - - native_cpuid(&ax, &bx, &cx, &dx); - - mwait_mask = (1 << (X86_FEATURE_EST % 32)) | - (1 << (X86_FEATURE_MWAIT % 32)); - - if ((cx & mwait_mask) != mwait_mask) - return false; - - /* We need to emulate the MWAIT_LEAF and for that we need both - * ecx and edx. The hypercall provides only partial information. - */ - - ax = CPUID_MWAIT_LEAF; - bx = 0; - cx = 0; - dx = 0; - - native_cpuid(&ax, &bx, &cx, &dx); - - /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, - * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. - */ - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); - - set_xen_guest_handle(op.u.set_pminfo.pdc, buf); - - if ((HYPERVISOR_dom0_op(&op) == 0) && - (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { - cpuid_leaf5_ecx_val = cx; - cpuid_leaf5_edx_val = dx; - } - return true; -#else - return false; -#endif -} -static void __init xen_init_cpuid_mask(void) -{ - unsigned int ax, bx, cx, dx; - unsigned int xsave_mask; - - cpuid_leaf1_edx_mask = - ~((1 << X86_FEATURE_MCE) | /* disable MCE */ - (1 << X86_FEATURE_MCA) | /* disable MCA */ - (1 << X86_FEATURE_MTRR) | /* disable MTRR */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ - - if (!xen_initial_domain()) - cpuid_leaf1_edx_mask &= - ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ - (1 << X86_FEATURE_ACPI)); /* disable ACPI */ - ax = 1; - cx = 0; - xen_cpuid(&ax, &bx, &cx, &dx); - - xsave_mask = - (1 << (X86_FEATURE_XSAVE % 32)) | - (1 << (X86_FEATURE_OSXSAVE % 32)); - - /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ - if ((cx & xsave_mask) != xsave_mask) - cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ - if (xen_check_mwait()) - cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); -} - -static void xen_set_debugreg(int reg, unsigned long val) -{ - HYPERVISOR_set_debugreg(reg, val); -} - -static unsigned long xen_get_debugreg(int reg) -{ - return HYPERVISOR_get_debugreg(reg); -} - -static void xen_end_context_switch(struct task_struct *next) -{ - xen_mc_flush(); - paravirt_end_context_switch(next); -} - -static unsigned long xen_store_tr(void) -{ - return 0; -} - -/* - * Set the page permissions for a particular virtual address. If the - * address is a vmalloc mapping (or other non-linear mapping), then - * find the linear mapping of the page and also set its protections to - * match. - */ -static void set_aliased_prot(void *v, pgprot_t prot) -{ - int level; - pte_t *ptep; - pte_t pte; - unsigned long pfn; - struct page *page; - - ptep = lookup_address((unsigned long)v, &level); - BUG_ON(ptep == NULL); - - pfn = pte_pfn(*ptep); - page = pfn_to_page(pfn); - - pte = pfn_pte(pfn, prot); - - if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) - BUG(); - - if (!PageHighMem(page)) { - void *av = __va(PFN_PHYS(pfn)); - - if (av != v) - if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) - BUG(); - } else - kmap_flush_unused(); -} - -static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) -{ - const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; - int i; - - for(i = 0; i < entries; i += entries_per_page) - set_aliased_prot(ldt + i, PAGE_KERNEL_RO); -} - -static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) -{ - const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; - int i; - - for(i = 0; i < entries; i += entries_per_page) - set_aliased_prot(ldt + i, PAGE_KERNEL); -} - -static void xen_set_ldt(const void *addr, unsigned entries) -{ - struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); - - trace_xen_cpu_set_ldt(addr, entries); - - op = mcs.args; - op->cmd = MMUEXT_SET_LDT; - op->arg1.linear_addr = (unsigned long)addr; - op->arg2.nr_ents = entries; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -static void xen_load_gdt(const struct desc_ptr *dtr) -{ - unsigned long va = dtr->address; - unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - unsigned long frames[pages]; - int f; - - /* - * A GDT can be up to 64k in size, which corresponds to 8192 - * 8-byte entries, or 16 4k pages.. - */ - - BUG_ON(size > 65536); - BUG_ON(va & ~PAGE_MASK); - - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - int level; - pte_t *ptep; - unsigned long pfn, mfn; - void *virt; - - /* - * The GDT is per-cpu and is in the percpu data area. - * That can be virtually mapped, so we need to do a - * page-walk to get the underlying MFN for the - * hypercall. The page can also be in the kernel's - * linear range, so we need to RO that mapping too. - */ - ptep = lookup_address(va, &level); - BUG_ON(ptep == NULL); - - pfn = pte_pfn(*ptep); - mfn = pfn_to_mfn(pfn); - virt = __va(PFN_PHYS(pfn)); - - frames[f] = mfn; - - make_lowmem_page_readonly((void *)va); - make_lowmem_page_readonly(virt); - } - - if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) - BUG(); -} - -/* - * load_gdt for early boot, when the gdt is only mapped once - */ -static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) -{ - unsigned long va = dtr->address; - unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - unsigned long frames[pages]; - int f; - - /* - * A GDT can be up to 64k in size, which corresponds to 8192 - * 8-byte entries, or 16 4k pages.. - */ - - BUG_ON(size > 65536); - BUG_ON(va & ~PAGE_MASK); - - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - pte_t pte; - unsigned long pfn, mfn; - - pfn = virt_to_pfn(va); - mfn = pfn_to_mfn(pfn); - - pte = pfn_pte(pfn, PAGE_KERNEL_RO); - - if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) - BUG(); - - frames[f] = mfn; - } - - if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) - BUG(); -} - -static void load_TLS_descriptor(struct thread_struct *t, - unsigned int cpu, unsigned int i) -{ - struct desc_struct *gdt = get_cpu_gdt_table(cpu); - xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); - struct multicall_space mc = __xen_mc_entry(0); - - MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); -} - -static void xen_load_tls(struct thread_struct *t, unsigned int cpu) -{ - /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone - * and lazy gs handling is enabled, it means we're in a - * context switch, and %gs has just been saved. This means we - * can zero it out to prevent faults on exit from the - * hypervisor if the next process has no %gs. Either way, it - * has been saved, and the new value will get loaded properly. - * This will go away as soon as Xen has been modified to not - * save/restore %gs for normal hypercalls. - * - * On x86_64, this hack is not used for %gs, because gs points - * to KERNEL_GS_BASE (and uses it for PDA references), so we - * must not zero %gs on x86_64 - * - * For x86_64, we need to zero %fs, otherwise we may get an - * exception between the new %fs descriptor being loaded and - * %fs being effectively cleared at __switch_to(). - */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { -#ifdef CONFIG_X86_32 - lazy_load_gs(0); -#else - loadsegment(fs, 0); -#endif - } - - xen_mc_batch(); - - load_TLS_descriptor(t, cpu, 0); - load_TLS_descriptor(t, cpu, 1); - load_TLS_descriptor(t, cpu, 2); - - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -#ifdef CONFIG_X86_64 -static void xen_load_gs_index(unsigned int idx) -{ - if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) - BUG(); -} -#endif - -static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, - const void *ptr) -{ - xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); - u64 entry = *(u64 *)ptr; - - trace_xen_cpu_write_ldt_entry(dt, entrynum, entry); - - preempt_disable(); - - xen_mc_flush(); - if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) - BUG(); - - preempt_enable(); -} - -static int cvt_gate_to_trap(int vector, const gate_desc *val, - struct trap_info *info) -{ - unsigned long addr; - - if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) - return 0; - - info->vector = vector; - - addr = gate_offset(*val); -#ifdef CONFIG_X86_64 - /* - * Look for known traps using IST, and substitute them - * appropriately. The debugger ones are the only ones we care - * about. Xen will handle faults like double_fault and - * machine_check, so we should never see them. Warn if - * there's an unexpected IST-using fault handler. - */ - if (addr == (unsigned long)debug) - addr = (unsigned long)xen_debug; - else if (addr == (unsigned long)int3) - addr = (unsigned long)xen_int3; - else if (addr == (unsigned long)stack_segment) - addr = (unsigned long)xen_stack_segment; - else if (addr == (unsigned long)double_fault || - addr == (unsigned long)nmi) { - /* Don't need to handle these */ - return 0; -#ifdef CONFIG_X86_MCE - } else if (addr == (unsigned long)machine_check) { - return 0; -#endif - } else { - /* Some other trap using IST? */ - if (WARN_ON(val->ist != 0)) - return 0; - } -#endif /* CONFIG_X86_64 */ - info->address = addr; - - info->cs = gate_segment(*val); - info->flags = val->dpl; - /* interrupt gates clear IF */ - if (val->type == GATE_INTERRUPT) - info->flags |= 1 << 2; - - return 1; -} - -/* Locations of each CPU's IDT */ -static DEFINE_PER_CPU(struct desc_ptr, idt_desc); - -/* Set an IDT entry. If the entry is part of the current IDT, then - also update Xen. */ -static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) -{ - unsigned long p = (unsigned long)&dt[entrynum]; - unsigned long start, end; - - trace_xen_cpu_write_idt_entry(dt, entrynum, g); - - preempt_disable(); - - start = __this_cpu_read(idt_desc.address); - end = start + __this_cpu_read(idt_desc.size) + 1; - - xen_mc_flush(); - - native_write_idt_entry(dt, entrynum, g); - - if (p >= start && (p + 8) <= end) { - struct trap_info info[2]; - - info[1].address = 0; - - if (cvt_gate_to_trap(entrynum, g, &info[0])) - if (HYPERVISOR_set_trap_table(info)) - BUG(); - } - - preempt_enable(); -} - -static void xen_convert_trap_info(const struct desc_ptr *desc, - struct trap_info *traps) -{ - unsigned in, out, count; - - count = (desc->size+1) / sizeof(gate_desc); - BUG_ON(count > 256); - - for (in = out = 0; in < count; in++) { - gate_desc *entry = (gate_desc*)(desc->address) + in; - - if (cvt_gate_to_trap(in, entry, &traps[out])) - out++; - } - traps[out].address = 0; -} - -void xen_copy_trap_info(struct trap_info *traps) -{ - const struct desc_ptr *desc = &__get_cpu_var(idt_desc); - - xen_convert_trap_info(desc, traps); -} - -/* Load a new IDT into Xen. In principle this can be per-CPU, so we - hold a spinlock to protect the static traps[] array (static because - it avoids allocation, and saves stack space). */ -static void xen_load_idt(const struct desc_ptr *desc) -{ - static DEFINE_SPINLOCK(lock); - static struct trap_info traps[257]; - - trace_xen_cpu_load_idt(desc); - - spin_lock(&lock); - - __get_cpu_var(idt_desc) = *desc; - - xen_convert_trap_info(desc, traps); - - xen_mc_flush(); - if (HYPERVISOR_set_trap_table(traps)) - BUG(); - - spin_unlock(&lock); -} - -/* Write a GDT descriptor entry. Ignore LDT descriptors, since - they're handled differently. */ -static void xen_write_gdt_entry(struct desc_struct *dt, int entry, - const void *desc, int type) -{ - trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); - - preempt_disable(); - - switch (type) { - case DESC_LDT: - case DESC_TSS: - /* ignore */ - break; - - default: { - xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]); - - xen_mc_flush(); - if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) - BUG(); - } - - } - - preempt_enable(); -} - -/* - * Version of write_gdt_entry for use at early boot-time needed to - * update an entry as simply as possible. - */ -static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, - const void *desc, int type) -{ - trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); - - switch (type) { - case DESC_LDT: - case DESC_TSS: - /* ignore */ - break; - - default: { - xmaddr_t maddr = virt_to_machine(&dt[entry]); - - if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) - dt[entry] = *(struct desc_struct *)desc; - } - - } -} - -static void xen_load_sp0(struct tss_struct *tss, - struct thread_struct *thread) -{ - struct multicall_space mcs; - - mcs = xen_mc_entry(0); - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -static void xen_set_iopl_mask(unsigned mask) -{ - struct physdev_set_iopl set_iopl; - - /* Force the change at ring 0. */ - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; - HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); -} - -static void xen_io_delay(void) -{ -} - -#ifdef CONFIG_X86_LOCAL_APIC -static unsigned long xen_set_apic_id(unsigned int x) -{ - WARN_ON(1); - return x; -} -static unsigned int xen_get_apic_id(unsigned long x) -{ - return ((x)>>24) & 0xFFu; -} -static u32 xen_apic_read(u32 reg) -{ - struct xen_platform_op op = { - .cmd = XENPF_get_cpuinfo, - .interface_version = XENPF_INTERFACE_VERSION, - .u.pcpu_info.xen_cpuid = 0, - }; - int ret = 0; - - /* Shouldn't need this as APIC is turned off for PV, and we only - * get called on the bootup processor. But just in case. */ - if (!xen_initial_domain() || smp_processor_id()) - return 0; - - if (reg == APIC_LVR) - return 0x10; - - if (reg != APIC_ID) - return 0; - - ret = HYPERVISOR_dom0_op(&op); - if (ret) - return 0; - - return op.u.pcpu_info.apic_id << 24; -} - -static void xen_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 xen_apic_icr_read(void) -{ - return 0; -} - -static void xen_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void xen_apic_wait_icr_idle(void) -{ - return; -} - -static u32 xen_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static void set_xen_basic_apic_ops(void) -{ - apic->read = xen_apic_read; - apic->write = xen_apic_write; - apic->icr_read = xen_apic_icr_read; - apic->icr_write = xen_apic_icr_write; - apic->wait_icr_idle = xen_apic_wait_icr_idle; - apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; - apic->set_apic_id = xen_set_apic_id; - apic->get_apic_id = xen_get_apic_id; -} - -#endif - -static void xen_clts(void) -{ - struct multicall_space mcs; - - mcs = xen_mc_entry(0); - - MULTI_fpu_taskswitch(mcs.mc, 0); - - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -static DEFINE_PER_CPU(unsigned long, xen_cr0_value); - -static unsigned long xen_read_cr0(void) -{ - unsigned long cr0 = this_cpu_read(xen_cr0_value); - - if (unlikely(cr0 == 0)) { - cr0 = native_read_cr0(); - this_cpu_write(xen_cr0_value, cr0); - } - - return cr0; -} - -static void xen_write_cr0(unsigned long cr0) -{ - struct multicall_space mcs; - - this_cpu_write(xen_cr0_value, cr0); - - /* Only pay attention to cr0.TS; everything else is - ignored. */ - mcs = xen_mc_entry(0); - - MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); - - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -static void xen_write_cr4(unsigned long cr4) -{ - cr4 &= ~X86_CR4_PGE; - cr4 &= ~X86_CR4_PSE; - - native_write_cr4(cr4); -} - -static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) -{ - int ret; - - ret = 0; - - switch (msr) { -#ifdef CONFIG_X86_64 - unsigned which; - u64 base; - - case MSR_FS_BASE: which = SEGBASE_FS; goto set; - case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; - case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; - - set: - base = ((u64)high << 32) | low; - if (HYPERVISOR_set_segment_base(which, base) != 0) - ret = -EIO; - break; -#endif - - case MSR_STAR: - case MSR_CSTAR: - case MSR_LSTAR: - case MSR_SYSCALL_MASK: - case MSR_IA32_SYSENTER_CS: - case MSR_IA32_SYSENTER_ESP: - case MSR_IA32_SYSENTER_EIP: - /* Fast syscall setup is all done in hypercalls, so - these are all ignored. Stub them out here to stop - Xen console noise. */ - break; - - case MSR_IA32_CR_PAT: - if (smp_processor_id() == 0) - xen_set_pat(((u64)high << 32) | low); - break; - - default: - ret = native_write_msr_safe(msr, low, high); - } - - return ret; -} - -void xen_setup_shared_info(void) -{ - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - set_fixmap(FIX_PARAVIRT_BOOTMAP, - xen_start_info->shared_info); - - HYPERVISOR_shared_info = - (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); - } else - HYPERVISOR_shared_info = - (struct shared_info *)__va(xen_start_info->shared_info); - -#ifndef CONFIG_SMP - /* In UP this is as good a place as any to set up shared info */ - xen_setup_vcpu_info_placement(); -#endif - - xen_setup_mfn_list_list(); -} - -/* This is called once we have the cpu_possible_mask */ -void xen_setup_vcpu_info_placement(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - xen_vcpu_setup(cpu); - - /* xen_vcpu_setup managed to place the vcpu_info within the - percpu area for all cpus, so make use of it */ - if (have_vcpu_info_placement) { - pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); - pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); - pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); - pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); - pv_mmu_ops.read_cr2 = xen_read_cr2_direct; - } -} - -static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, - unsigned long addr, unsigned len) -{ - char *start, *end, *reloc; - unsigned ret; - - start = end = reloc = NULL; - -#define SITE(op, x) \ - case PARAVIRT_PATCH(op.x): \ - if (have_vcpu_info_placement) { \ - start = (char *)xen_##x##_direct; \ - end = xen_##x##_direct_end; \ - reloc = xen_##x##_direct_reloc; \ - } \ - goto patch_site - - switch (type) { - SITE(pv_irq_ops, irq_enable); - SITE(pv_irq_ops, irq_disable); - SITE(pv_irq_ops, save_fl); - SITE(pv_irq_ops, restore_fl); -#undef SITE - - patch_site: - if (start == NULL || (end-start) > len) - goto default_patch; - - ret = paravirt_patch_insns(insnbuf, len, start, end); - - /* Note: because reloc is assigned from something that - appears to be an array, gcc assumes it's non-null, - but doesn't know its relationship with start and - end. */ - if (reloc > start && reloc < end) { - int reloc_off = reloc - start; - long *relocp = (long *)(insnbuf + reloc_off); - long delta = start - (char *)addr; - - *relocp += delta; - } - break; - - default_patch: - default: - ret = paravirt_patch_default(type, clobbers, insnbuf, - addr, len); - break; - } - - return ret; -} - -static const struct pv_info xen_info __initconst = { - .paravirt_enabled = 1, - .shared_kernel_pmd = 0, - -#ifdef CONFIG_X86_64 - .extra_user_64bit_cs = FLAT_USER_CS64, -#endif - - .name = "Xen", -}; - -static const struct pv_init_ops xen_init_ops __initconst = { - .patch = xen_patch, -}; - -static const struct pv_cpu_ops xen_cpu_ops __initconst = { - .cpuid = xen_cpuid, - - .set_debugreg = xen_set_debugreg, - .get_debugreg = xen_get_debugreg, - - .clts = xen_clts, - - .read_cr0 = xen_read_cr0, - .write_cr0 = xen_write_cr0, - - .read_cr4 = native_read_cr4, - .read_cr4_safe = native_read_cr4_safe, - .write_cr4 = xen_write_cr4, - - .wbinvd = native_wbinvd, - - .read_msr = native_read_msr_safe, - .rdmsr_regs = native_rdmsr_safe_regs, - .write_msr = xen_write_msr_safe, - .wrmsr_regs = native_wrmsr_safe_regs, - - .read_tsc = native_read_tsc, - .read_pmc = native_read_pmc, - - .iret = xen_iret, - .irq_enable_sysexit = xen_sysexit, -#ifdef CONFIG_X86_64 - .usergs_sysret32 = xen_sysret32, - .usergs_sysret64 = xen_sysret64, -#endif - - .load_tr_desc = paravirt_nop, - .set_ldt = xen_set_ldt, - .load_gdt = xen_load_gdt, - .load_idt = xen_load_idt, - .load_tls = xen_load_tls, -#ifdef CONFIG_X86_64 - .load_gs_index = xen_load_gs_index, -#endif - - .alloc_ldt = xen_alloc_ldt, - .free_ldt = xen_free_ldt, - - .store_gdt = native_store_gdt, - .store_idt = native_store_idt, - .store_tr = xen_store_tr, - - .write_ldt_entry = xen_write_ldt_entry, - .write_gdt_entry = xen_write_gdt_entry, - .write_idt_entry = xen_write_idt_entry, - .load_sp0 = xen_load_sp0, - - .set_iopl_mask = xen_set_iopl_mask, - .io_delay = xen_io_delay, - - /* Xen takes care of %gs when switching to usermode for us */ - .swapgs = paravirt_nop, - - .start_context_switch = paravirt_start_context_switch, - .end_context_switch = xen_end_context_switch, -}; - -static const struct pv_apic_ops xen_apic_ops __initconst = { -#ifdef CONFIG_X86_LOCAL_APIC - .startup_ipi_hook = paravirt_nop, -#endif -}; - -static void xen_reboot(int reason) -{ - struct sched_shutdown r = { .reason = reason }; - - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) - BUG(); -} - -static void xen_restart(char *msg) -{ - xen_reboot(SHUTDOWN_reboot); -} - -static void xen_emergency_restart(void) -{ - xen_reboot(SHUTDOWN_reboot); -} - -static void xen_machine_halt(void) -{ - xen_reboot(SHUTDOWN_poweroff); -} - -static void xen_machine_power_off(void) -{ - if (pm_power_off) - pm_power_off(); - xen_reboot(SHUTDOWN_poweroff); -} - -static void xen_crash_shutdown(struct pt_regs *regs) -{ - xen_reboot(SHUTDOWN_crash); -} - -static int -xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - xen_reboot(SHUTDOWN_crash); - return NOTIFY_DONE; -} - -static struct notifier_block xen_panic_block = { - .notifier_call= xen_panic_event, -}; - -int xen_panic_handler_init(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); - return 0; -} - -static const struct machine_ops xen_machine_ops __initconst = { - .restart = xen_restart, - .halt = xen_machine_halt, - .power_off = xen_machine_power_off, - .shutdown = xen_machine_halt, - .crash_shutdown = xen_crash_shutdown, - .emergency_restart = xen_emergency_restart, -}; - -/* - * Set up the GDT and segment registers for -fstack-protector. Until - * we do this, we have to be careful not to call any stack-protected - * function, which is most of the kernel. - */ -static void __init xen_setup_stackprotector(void) -{ - pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; - pv_cpu_ops.load_gdt = xen_load_gdt_boot; - - setup_stack_canary_segment(0); - switch_to_new_gdt(0); - - pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry; - pv_cpu_ops.load_gdt = xen_load_gdt; -} - -/* First C function to be called on Xen boot */ -asmlinkage void __init xen_start_kernel(void) -{ - struct physdev_set_iopl set_iopl; - int rc; - pgd_t *pgd; - - if (!xen_start_info) - return; - - xen_domain_type = XEN_PV_DOMAIN; - - xen_setup_machphys_mapping(); - - /* Install Xen paravirt ops */ - pv_info = xen_info; - pv_init_ops = xen_init_ops; - pv_cpu_ops = xen_cpu_ops; - pv_apic_ops = xen_apic_ops; - - x86_init.resources.memory_setup = xen_memory_setup; - x86_init.oem.arch_setup = xen_arch_setup; - x86_init.oem.banner = xen_banner; - - xen_init_time_ops(); - - /* - * Set up some pagetable state before starting to set any ptes. - */ - - xen_init_mmu_ops(); - - /* Prevent unwanted bits from being set in PTEs. */ - __supported_pte_mask &= ~_PAGE_GLOBAL; -#if 0 - if (!xen_initial_domain()) -#endif - __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - - __supported_pte_mask |= _PAGE_IOMAP; - - /* - * Prevent page tables from being allocated in highmem, even - * if CONFIG_HIGHPTE is enabled. - */ - __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - - /* Work out if we support NX */ - x86_configure_nx(); - - xen_setup_features(); - - /* Get mfn list */ - if (!xen_feature(XENFEAT_auto_translated_physmap)) - xen_build_dynamic_phys_to_machine(); - - /* - * Set up kernel GDT and segment registers, mainly so that - * -fstack-protector code can be executed. - */ - xen_setup_stackprotector(); - - xen_init_irq_ops(); - xen_init_cpuid_mask(); - -#ifdef CONFIG_X86_LOCAL_APIC - /* - * set up the basic apic ops. - */ - set_xen_basic_apic_ops(); -#endif - - if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { - pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; - pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; - } - - machine_ops = xen_machine_ops; - - /* - * The only reliable way to retain the initial address of the - * percpu gdt_page is to remember it here, so we can go and - * mark it RW later, when the initial percpu area is freed. - */ - xen_initial_gdt = &per_cpu(gdt_page, 0); - - xen_smp_init(); - -#ifdef CONFIG_ACPI_NUMA - /* - * The pages we from Xen are not related to machine pages, so - * any NUMA information the kernel tries to get from ACPI will - * be meaningless. Prevent it from trying. - */ - acpi_numa = -1; -#endif - - pgd = (pgd_t *)xen_start_info->pt_base; - - /* Don't do the full vcpu_info placement stuff until we have a - possible map and a non-dummy shared_info. */ - per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; - - local_irq_disable(); - early_boot_irqs_disabled = true; - - xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); - xen_ident_map_ISA(); - - /* Allocate and initialize top and mid mfn levels for p2m structure */ - xen_build_mfn_list_list(); - - /* keep using Xen gdt for now; no urgent need to change it */ - -#ifdef CONFIG_X86_32 - pv_info.kernel_rpl = 1; - if (xen_feature(XENFEAT_supervisor_mode_kernel)) - pv_info.kernel_rpl = 0; -#else - pv_info.kernel_rpl = 0; -#endif - /* set the limit of our address space */ - xen_reserve_top(); - - /* We used to do this in xen_arch_setup, but that is too late on AMD - * were early_cpu_init (run before ->arch_setup()) calls early_amd_init - * which pokes 0xcf8 port. - */ - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - xen_raw_printk("physdev_op failed %d\n", rc); - -#ifdef CONFIG_X86_32 - /* set up basic CPUID stuff */ - cpu_detect(&new_cpu_data); - new_cpu_data.hard_math = 1; - new_cpu_data.wp_works_ok = 1; - new_cpu_data.x86_capability[0] = cpuid_edx(1); -#endif - - /* Poke various useful things into boot_params */ - boot_params.hdr.type_of_loader = (9 << 4) | 0; - boot_params.hdr.ramdisk_image = xen_start_info->mod_start - ? __pa(xen_start_info->mod_start) : 0; - boot_params.hdr.ramdisk_size = xen_start_info->mod_len; - boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); - - if (!xen_initial_domain()) { - add_preferred_console("xenboot", 0, NULL); - add_preferred_console("tty", 0, NULL); - add_preferred_console("hvc", 0, NULL); - if (pci_xen) - x86_init.pci.arch_init = pci_xen_init; - } else { - const struct dom0_vga_console_info *info = - (void *)((char *)xen_start_info + - xen_start_info->console.dom0.info_off); - - xen_init_vga(info, xen_start_info->console.dom0.info_size); - xen_start_info->console.domU.mfn = 0; - xen_start_info->console.domU.evtchn = 0; - - /* Make sure ACS will be enabled */ - pci_request_acs(); - } -#ifdef CONFIG_PCI - /* PCI BIOS service won't work from a PV guest. */ - pci_probe &= ~PCI_PROBE_BIOS; -#endif - xen_raw_console_write("about to get started...\n"); - - xen_setup_runstate_info(0); - - /* Start the world */ -#ifdef CONFIG_X86_32 - i386_start_kernel(); -#else - x86_64_start_reservations((char *)__pa_symbol(&boot_params)); -#endif -} - -static int init_hvm_pv_info(int *major, int *minor) -{ - uint32_t eax, ebx, ecx, edx, pages, msr, base; - u64 pfn; - - base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - *major = eax >> 16; - *minor = eax & 0xffff; - printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); - - cpuid(base + 2, &pages, &msr, &ecx, &edx); - - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); - - xen_setup_features(); - - pv_info.name = "Xen HVM"; - - xen_domain_type = XEN_HVM_DOMAIN; - - return 0; -} - -void __ref xen_hvm_init_shared_info(void) -{ - int cpu; - struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page = 0; - - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - BUG(); - - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; - - /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info - * page, we use it in the event channel upcall and in some pvclock - * related functions. We don't need the vcpu_info placement - * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and - * in that case multiple vcpus might be online. */ - for_each_online_cpu(cpu) { - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - } -} - -#ifdef CONFIG_XEN_PVHVM -static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - switch (action) { - case CPU_UP_PREPARE: - xen_vcpu_setup(cpu); - if (xen_have_vector_callback) - xen_init_lock_cpu(cpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { - .notifier_call = xen_hvm_cpu_notify, -}; - -static void __init xen_hvm_guest_init(void) -{ - int r; - int major, minor; - - r = init_hvm_pv_info(&major, &minor); - if (r < 0) - return; - - xen_hvm_init_shared_info(); - - if (xen_feature(XENFEAT_hvm_callback_vector)) - xen_have_vector_callback = 1; - xen_hvm_smp_init(); - register_cpu_notifier(&xen_hvm_cpu_notifier); - xen_unplug_emulated_devices(); - x86_init.irqs.intr_init = xen_init_IRQ; - xen_hvm_init_time_ops(); - xen_hvm_init_mmu_ops(); -} - -static bool __init xen_hvm_platform(void) -{ - if (xen_pv_domain()) - return false; - - if (!xen_cpuid_base()) - return false; - - return true; -} - -bool xen_hvm_need_lapic(void) -{ - if (xen_pv_domain()) - return false; - if (!xen_hvm_domain()) - return false; - if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback) - return false; - return true; -} -EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); - -const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { - .name = "Xen HVM", - .detect = xen_hvm_platform, - .init_platform = xen_hvm_guest_init, -}; -EXPORT_SYMBOL(x86_hyper_xen_hvm); -#endif diff --git a/ANDROID_3.4.5/arch/x86/xen/grant-table.c b/ANDROID_3.4.5/arch/x86/xen/grant-table.c deleted file mode 100644 index 3a5f55d5..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/grant-table.c +++ /dev/null @@ -1,127 +0,0 @@ -/****************************************************************************** - * grant_table.c - * x86 specific part - * - * Granting foreign access to our memory reservation. - * - * Copyright (c) 2005-2006, Christopher Clark - * Copyright (c) 2004-2005, K A Fraser - * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> - * VA Linux Systems Japan. Split out x86 specific part. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmalloc.h> - -#include <xen/interface/xen.h> -#include <xen/page.h> -#include <xen/grant_table.h> - -#include <asm/pgtable.h> - -static int map_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - unsigned long **frames = (unsigned long **)data; - - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; - return 0; -} - -/* - * This function is used to map shared frames to store grant status. It is - * different from map_pte_fn above, the frames type here is uint64_t. - */ -static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - uint64_t **frames = (uint64_t **)data; - - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; - return 0; -} - -static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - - set_pte_at(&init_mm, addr, pte, __pte(0)); - return 0; -} - -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - void **__shared) -{ - int rc; - void *shared = *__shared; - - if (shared == NULL) { - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; - } - - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn, &frames); - return rc; -} - -int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - grant_status_t **__shared) -{ - int rc; - grant_status_t *shared = *__shared; - - if (shared == NULL) { - /* No need to pass in PTE as we are going to do it - * in apply_to_page_range anyhow. */ - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; - } - - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn_status, &frames); - return rc; -} - -void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) -{ - apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); -} diff --git a/ANDROID_3.4.5/arch/x86/xen/irq.c b/ANDROID_3.4.5/arch/x86/xen/irq.c deleted file mode 100644 index 15733765..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/irq.c +++ /dev/null @@ -1,133 +0,0 @@ -#include <linux/hardirq.h> - -#include <asm/x86_init.h> - -#include <xen/interface/xen.h> -#include <xen/interface/sched.h> -#include <xen/interface/vcpu.h> - -#include <asm/xen/hypercall.h> -#include <asm/xen/hypervisor.h> - -#include "xen-ops.h" - -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void xen_force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0, NULL); -} - -static unsigned long xen_save_fl(void) -{ - struct vcpu_info *vcpu; - unsigned long flags; - - vcpu = this_cpu_read(xen_vcpu); - - /* flag has opposite sense of mask */ - flags = !vcpu->evtchn_upcall_mask; - - /* convert to IF type flag - -0 -> 0x00000000 - -1 -> 0xffffffff - */ - return (-flags) & X86_EFLAGS_IF; -} -PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); - -static void xen_restore_fl(unsigned long flags) -{ - struct vcpu_info *vcpu; - - /* convert from IF type flag */ - flags = !(flags & X86_EFLAGS_IF); - - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - vcpu = this_cpu_read(xen_vcpu); - vcpu->evtchn_upcall_mask = flags; - preempt_enable_no_resched(); - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - if (flags == 0) { - preempt_check_resched(); - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - xen_force_evtchn_callback(); - } -} -PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); - -static void xen_irq_disable(void) -{ - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; - preempt_enable_no_resched(); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); - -static void xen_irq_enable(void) -{ - struct vcpu_info *vcpu; - - /* We don't need to worry about being preempted here, since - either a) interrupts are disabled, so no preemption, or b) - the caller is confused and is trying to re-enable interrupts - on an indeterminate processor. */ - - vcpu = this_cpu_read(xen_vcpu); - vcpu->evtchn_upcall_mask = 0; - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - xen_force_evtchn_callback(); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); - -static void xen_safe_halt(void) -{ - /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) - BUG(); -} - -static void xen_halt(void) -{ - if (irqs_disabled()) - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); - else - xen_safe_halt(); -} - -static const struct pv_irq_ops xen_irq_ops __initconst = { - .save_fl = PV_CALLEE_SAVE(xen_save_fl), - .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), - .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), - .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), - - .safe_halt = xen_safe_halt, - .halt = xen_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = xen_adjust_exception_frame, -#endif -}; - -void __init xen_init_irq_ops(void) -{ - pv_irq_ops = xen_irq_ops; - x86_init.irqs.intr_init = xen_init_IRQ; -} diff --git a/ANDROID_3.4.5/arch/x86/xen/mmu.c b/ANDROID_3.4.5/arch/x86/xen/mmu.c deleted file mode 100644 index 69f58576..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/mmu.c +++ /dev/null @@ -1,2371 +0,0 @@ -/* - * Xen mmu operations - * - * This file contains the various mmu fetch and update operations. - * The most important job they must perform is the mapping between the - * domain's pfn and the overall machine mfns. - * - * Xen allows guests to directly update the pagetable, in a controlled - * fashion. In other words, the guest modifies the same pagetable - * that the CPU actually uses, which eliminates the overhead of having - * a separate shadow pagetable. - * - * In order to allow this, it falls on the guest domain to map its - * notion of a "physical" pfn - which is just a domain-local linear - * address - into a real "machine address" which the CPU's MMU can - * use. - * - * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be - * inserted directly into the pagetable. When creating a new - * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely, - * when reading the content back with __(pgd|pmd|pte)_val, it converts - * the mfn back into a pfn. - * - * The other constraint is that all pages which make up a pagetable - * must be mapped read-only in the guest. This prevents uncontrolled - * guest updates to the pagetable. Xen strictly enforces this, and - * will disallow any pagetable update which will end up mapping a - * pagetable page RW, and will disallow using any writable page as a - * pagetable. - * - * Naively, when loading %cr3 with the base of a new pagetable, Xen - * would need to validate the whole pagetable before going on. - * Naturally, this is quite slow. The solution is to "pin" a - * pagetable, which enforces all the constraints on the pagetable even - * when it is not actively in use. This menas that Xen can be assured - * that it is still valid when you do load it into %cr3, and doesn't - * need to revalidate it. - * - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ -#include <linux/sched.h> -#include <linux/highmem.h> -#include <linux/debugfs.h> -#include <linux/bug.h> -#include <linux/vmalloc.h> -#include <linux/module.h> -#include <linux/gfp.h> -#include <linux/memblock.h> -#include <linux/seq_file.h> - -#include <trace/events/xen.h> - -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/fixmap.h> -#include <asm/mmu_context.h> -#include <asm/setup.h> -#include <asm/paravirt.h> -#include <asm/e820.h> -#include <asm/linkage.h> -#include <asm/page.h> -#include <asm/init.h> -#include <asm/pat.h> -#include <asm/smp.h> - -#include <asm/xen/hypercall.h> -#include <asm/xen/hypervisor.h> - -#include <xen/xen.h> -#include <xen/page.h> -#include <xen/interface/xen.h> -#include <xen/interface/hvm/hvm_op.h> -#include <xen/interface/version.h> -#include <xen/interface/memory.h> -#include <xen/hvc-console.h> - -#include "multicalls.h" -#include "mmu.h" -#include "debugfs.h" - -/* - * Protects atomic reservation decrease/increase against concurrent increases. - * Also protects non-atomic updates of current_pages and balloon lists. - */ -DEFINE_SPINLOCK(xen_reservation_lock); - -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) -static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); - -#ifdef CONFIG_X86_64 -/* l3 pud for userspace vsyscall mapping */ -static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ - -/* - * Note about cr3 (pagetable base) values: - * - * xen_cr3 contains the current logical cr3 value; it contains the - * last set cr3. This may not be the current effective cr3, because - * its update may be being lazily deferred. However, a vcpu looking - * at its own cr3 can use this value knowing that it everything will - * be self-consistent. - * - * xen_current_cr3 contains the actual vcpu cr3; it is set once the - * hypercall to set the vcpu cr3 is complete (so it may be a little - * out of date, but it will never be set early). If one vcpu is - * looking at another vcpu's cr3 value, it should use this variable. - */ -DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ -DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ - - -/* - * Just beyond the highest usermode address. STACK_TOP_MAX has a - * redzone above it, so round it up to a PGD boundary. - */ -#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) - -unsigned long arbitrary_virt_to_mfn(void *vaddr) -{ - xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); - - return PFN_DOWN(maddr.maddr); -} - -xmaddr_t arbitrary_virt_to_machine(void *vaddr) -{ - unsigned long address = (unsigned long)vaddr; - unsigned int level; - pte_t *pte; - unsigned offset; - - /* - * if the PFN is in the linear mapped vaddr range, we can just use - * the (quick) virt_to_machine() p2m lookup - */ - if (virt_addr_valid(vaddr)) - return virt_to_machine(vaddr); - - /* otherwise we have to do a (slower) full page-table walk */ - - pte = lookup_address(address, &level); - BUG_ON(pte == NULL); - offset = address & ~PAGE_MASK; - return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); -} -EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); - -void make_lowmem_page_readonly(void *vaddr) -{ - pte_t *pte, ptev; - unsigned long address = (unsigned long)vaddr; - unsigned int level; - - pte = lookup_address(address, &level); - if (pte == NULL) - return; /* vaddr missing */ - - ptev = pte_wrprotect(*pte); - - if (HYPERVISOR_update_va_mapping(address, ptev, 0)) - BUG(); -} - -void make_lowmem_page_readwrite(void *vaddr) -{ - pte_t *pte, ptev; - unsigned long address = (unsigned long)vaddr; - unsigned int level; - - pte = lookup_address(address, &level); - if (pte == NULL) - return; /* vaddr missing */ - - ptev = pte_mkwrite(*pte); - - if (HYPERVISOR_update_va_mapping(address, ptev, 0)) - BUG(); -} - - -static bool xen_page_pinned(void *ptr) -{ - struct page *page = virt_to_page(ptr); - - return PagePinned(page); -} - -void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) -{ - struct multicall_space mcs; - struct mmu_update *u; - - trace_xen_mmu_set_domain_pte(ptep, pteval, domid); - - mcs = xen_mc_entry(sizeof(*u)); - u = mcs.args; - - /* ptep might be kmapped when using 32-bit HIGHPTE */ - u->ptr = virt_to_machine(ptep).maddr; - u->val = pte_val_ma(pteval); - - MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} -EXPORT_SYMBOL_GPL(xen_set_domain_pte); - -static void xen_extend_mmu_update(const struct mmu_update *update) -{ - struct multicall_space mcs; - struct mmu_update *u; - - mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); - - if (mcs.mc != NULL) { - mcs.mc->args[1]++; - } else { - mcs = __xen_mc_entry(sizeof(*u)); - MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); - } - - u = mcs.args; - *u = *update; -} - -static void xen_extend_mmuext_op(const struct mmuext_op *op) -{ - struct multicall_space mcs; - struct mmuext_op *u; - - mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u)); - - if (mcs.mc != NULL) { - mcs.mc->args[1]++; - } else { - mcs = __xen_mc_entry(sizeof(*u)); - MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); - } - - u = mcs.args; - *u = *op; -} - -static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) -{ - struct mmu_update u; - - preempt_disable(); - - xen_mc_batch(); - - /* ptr may be ioremapped for 64-bit pagetable setup */ - u.ptr = arbitrary_virt_to_machine(ptr).maddr; - u.val = pmd_val_ma(val); - xen_extend_mmu_update(&u); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_set_pmd(pmd_t *ptr, pmd_t val) -{ - trace_xen_mmu_set_pmd(ptr, val); - - /* If page is not pinned, we can just update the entry - directly */ - if (!xen_page_pinned(ptr)) { - *ptr = val; - return; - } - - xen_set_pmd_hyper(ptr, val); -} - -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) -{ - set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); -} - -static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) -{ - struct mmu_update u; - - if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) - return false; - - xen_mc_batch(); - - u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; - u.val = pte_val_ma(pteval); - xen_extend_mmu_update(&u); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - return true; -} - -static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) -{ - if (!xen_batched_set_pte(ptep, pteval)) - native_set_pte(ptep, pteval); -} - -static void xen_set_pte(pte_t *ptep, pte_t pteval) -{ - trace_xen_mmu_set_pte(ptep, pteval); - __xen_set_pte(ptep, pteval); -} - -static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval); - __xen_set_pte(ptep, pteval); -} - -pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - /* Just return the pte as-is. We preserve the bits on commit */ - trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep); - return *ptep; -} - -void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - struct mmu_update u; - - trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte); - xen_mc_batch(); - - u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; - u.val = pte_val_ma(pte); - xen_extend_mmu_update(&u); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} - -/* Assume pteval_t is equivalent to all the other *val_t types. */ -static pteval_t pte_mfn_to_pfn(pteval_t val) -{ - if (val & _PAGE_PRESENT) { - unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; - unsigned long pfn = mfn_to_pfn(mfn); - - pteval_t flags = val & PTE_FLAGS_MASK; - if (unlikely(pfn == ~0)) - val = flags & ~_PAGE_PRESENT; - else - val = ((pteval_t)pfn << PAGE_SHIFT) | flags; - } - - return val; -} - -static pteval_t pte_pfn_to_mfn(pteval_t val) -{ - if (val & _PAGE_PRESENT) { - unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; - pteval_t flags = val & PTE_FLAGS_MASK; - unsigned long mfn; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) - mfn = get_phys_to_machine(pfn); - else - mfn = pfn; - /* - * If there's no mfn for the pfn, then just create an - * empty non-present pte. Unfortunately this loses - * information about the original pfn, so - * pte_mfn_to_pfn is asymmetric. - */ - if (unlikely(mfn == INVALID_P2M_ENTRY)) { - mfn = 0; - flags = 0; - } else { - /* - * Paramount to do this test _after_ the - * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & - * IDENTITY_FRAME_BIT resolves to true. - */ - mfn &= ~FOREIGN_FRAME_BIT; - if (mfn & IDENTITY_FRAME_BIT) { - mfn &= ~IDENTITY_FRAME_BIT; - flags |= _PAGE_IOMAP; - } - } - val = ((pteval_t)mfn << PAGE_SHIFT) | flags; - } - - return val; -} - -static pteval_t iomap_pte(pteval_t val) -{ - if (val & _PAGE_PRESENT) { - unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; - pteval_t flags = val & PTE_FLAGS_MASK; - - /* We assume the pte frame number is a MFN, so - just use it as-is. */ - val = ((pteval_t)pfn << PAGE_SHIFT) | flags; - } - - return val; -} - -static pteval_t xen_pte_val(pte_t pte) -{ - pteval_t pteval = pte.pte; -#if 0 - /* If this is a WC pte, convert back from Xen WC to Linux WC */ - if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { - WARN_ON(!pat_enabled); - pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; - } -#endif - if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) - return pteval; - - return pte_mfn_to_pfn(pteval); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); - -static pgdval_t xen_pgd_val(pgd_t pgd) -{ - return pte_mfn_to_pfn(pgd.pgd); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); - -/* - * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7 - * are reserved for now, to correspond to the Intel-reserved PAT - * types. - * - * We expect Linux's PAT set as follows: - * - * Idx PTE flags Linux Xen Default - * 0 WB WB WB - * 1 PWT WC WT WT - * 2 PCD UC- UC- UC- - * 3 PCD PWT UC UC UC - * 4 PAT WB WC WB - * 5 PAT PWT WC WP WT - * 6 PAT PCD UC- UC UC- - * 7 PAT PCD PWT UC UC UC - */ - -void xen_set_pat(u64 pat) -{ - /* We expect Linux to use a PAT setting of - * UC UC- WC WB (ignoring the PAT flag) */ - WARN_ON(pat != 0x0007010600070106ull); -} - -static pte_t xen_make_pte(pteval_t pte) -{ - phys_addr_t addr = (pte & PTE_PFN_MASK); -#if 0 - /* If Linux is trying to set a WC pte, then map to the Xen WC. - * If _PAGE_PAT is set, then it probably means it is really - * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope - * things work out OK... - * - * (We should never see kernel mappings with _PAGE_PSE set, - * but we could see hugetlbfs mappings, I think.). - */ - if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) { - if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) - pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; - } -#endif - /* - * Unprivileged domains are allowed to do IOMAPpings for - * PCI passthrough, but not map ISA space. The ISA - * mappings are just dummy local mappings to keep other - * parts of the kernel happy. - */ - if (unlikely(pte & _PAGE_IOMAP) && - (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { - pte = iomap_pte(pte); - } else { - pte &= ~_PAGE_IOMAP; - pte = pte_pfn_to_mfn(pte); - } - - return native_make_pte(pte); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); - -static pgd_t xen_make_pgd(pgdval_t pgd) -{ - pgd = pte_pfn_to_mfn(pgd); - return native_make_pgd(pgd); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); - -static pmdval_t xen_pmd_val(pmd_t pmd) -{ - return pte_mfn_to_pfn(pmd.pmd); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); - -static void xen_set_pud_hyper(pud_t *ptr, pud_t val) -{ - struct mmu_update u; - - preempt_disable(); - - xen_mc_batch(); - - /* ptr may be ioremapped for 64-bit pagetable setup */ - u.ptr = arbitrary_virt_to_machine(ptr).maddr; - u.val = pud_val_ma(val); - xen_extend_mmu_update(&u); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_set_pud(pud_t *ptr, pud_t val) -{ - trace_xen_mmu_set_pud(ptr, val); - - /* If page is not pinned, we can just update the entry - directly */ - if (!xen_page_pinned(ptr)) { - *ptr = val; - return; - } - - xen_set_pud_hyper(ptr, val); -} - -#ifdef CONFIG_X86_PAE -static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) -{ - trace_xen_mmu_set_pte_atomic(ptep, pte); - set_64bit((u64 *)ptep, native_pte_val(pte)); -} - -static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - trace_xen_mmu_pte_clear(mm, addr, ptep); - if (!xen_batched_set_pte(ptep, native_make_pte(0))) - native_pte_clear(mm, addr, ptep); -} - -static void xen_pmd_clear(pmd_t *pmdp) -{ - trace_xen_mmu_pmd_clear(pmdp); - set_pmd(pmdp, __pmd(0)); -} -#endif /* CONFIG_X86_PAE */ - -static pmd_t xen_make_pmd(pmdval_t pmd) -{ - pmd = pte_pfn_to_mfn(pmd); - return native_make_pmd(pmd); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); - -#if PAGETABLE_LEVELS == 4 -static pudval_t xen_pud_val(pud_t pud) -{ - return pte_mfn_to_pfn(pud.pud); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); - -static pud_t xen_make_pud(pudval_t pud) -{ - pud = pte_pfn_to_mfn(pud); - - return native_make_pud(pud); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); - -static pgd_t *xen_get_user_pgd(pgd_t *pgd) -{ - pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); - unsigned offset = pgd - pgd_page; - pgd_t *user_ptr = NULL; - - if (offset < pgd_index(USER_LIMIT)) { - struct page *page = virt_to_page(pgd_page); - user_ptr = (pgd_t *)page->private; - if (user_ptr) - user_ptr += offset; - } - - return user_ptr; -} - -static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) -{ - struct mmu_update u; - - u.ptr = virt_to_machine(ptr).maddr; - u.val = pgd_val_ma(val); - xen_extend_mmu_update(&u); -} - -/* - * Raw hypercall-based set_pgd, intended for in early boot before - * there's a page structure. This implies: - * 1. The only existing pagetable is the kernel's - * 2. It is always pinned - * 3. It has no user pagetable attached to it - */ -static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) -{ - preempt_disable(); - - xen_mc_batch(); - - __xen_set_pgd_hyper(ptr, val); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_set_pgd(pgd_t *ptr, pgd_t val) -{ - pgd_t *user_ptr = xen_get_user_pgd(ptr); - - trace_xen_mmu_set_pgd(ptr, user_ptr, val); - - /* If page is not pinned, we can just update the entry - directly */ - if (!xen_page_pinned(ptr)) { - *ptr = val; - if (user_ptr) { - WARN_ON(xen_page_pinned(user_ptr)); - *user_ptr = val; - } - return; - } - - /* If it's pinned, then we can at least batch the kernel and - user updates together. */ - xen_mc_batch(); - - __xen_set_pgd_hyper(ptr, val); - if (user_ptr) - __xen_set_pgd_hyper(user_ptr, val); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} -#endif /* PAGETABLE_LEVELS == 4 */ - -/* - * (Yet another) pagetable walker. This one is intended for pinning a - * pagetable. This means that it walks a pagetable and calls the - * callback function on each page it finds making up the page table, - * at every level. It walks the entire pagetable, but it only bothers - * pinning pte pages which are below limit. In the normal case this - * will be STACK_TOP_MAX, but at boot we need to pin up to - * FIXADDR_TOP. - * - * For 32-bit the important bit is that we don't pin beyond there, - * because then we start getting into Xen's ptes. - * - * For 64-bit, we must skip the Xen hole in the middle of the address - * space, just after the big x86-64 virtual hole. - */ -static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) -{ - int flush = 0; - unsigned hole_low, hole_high; - unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; - unsigned pgdidx, pudidx, pmdidx; - - /* The limit is the last byte to be touched */ - limit--; - BUG_ON(limit >= FIXADDR_TOP); - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 0; - - /* - * 64-bit has a great big hole in the middle of the address - * space, which contains the Xen mappings. On 32-bit these - * will end up making a zero-sized hole and so is a no-op. - */ - hole_low = pgd_index(USER_LIMIT); - hole_high = pgd_index(PAGE_OFFSET); - - pgdidx_limit = pgd_index(limit); -#if PTRS_PER_PUD > 1 - pudidx_limit = pud_index(limit); -#else - pudidx_limit = 0; -#endif -#if PTRS_PER_PMD > 1 - pmdidx_limit = pmd_index(limit); -#else - pmdidx_limit = 0; -#endif - - for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { - pud_t *pud; - - if (pgdidx >= hole_low && pgdidx < hole_high) - continue; - - if (!pgd_val(pgd[pgdidx])) - continue; - - pud = pud_offset(&pgd[pgdidx], 0); - - if (PTRS_PER_PUD > 1) /* not folded */ - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); - - for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { - pmd_t *pmd; - - if (pgdidx == pgdidx_limit && - pudidx > pudidx_limit) - goto out; - - if (pud_none(pud[pudidx])) - continue; - - pmd = pmd_offset(&pud[pudidx], 0); - - if (PTRS_PER_PMD > 1) /* not folded */ - flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); - - for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { - struct page *pte; - - if (pgdidx == pgdidx_limit && - pudidx == pudidx_limit && - pmdidx > pmdidx_limit) - goto out; - - if (pmd_none(pmd[pmdidx])) - continue; - - pte = pmd_page(pmd[pmdidx]); - flush |= (*func)(mm, pte, PT_PTE); - } - } - } - -out: - /* Do the top level last, so that the callbacks can use it as - a cue to do final things like tlb flushes. */ - flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); - - return flush; -} - -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) -{ - return __xen_pgd_walk(mm, mm->pgd, func, limit); -} - -/* If we're using split pte locks, then take the page's lock and - return a pointer to it. Otherwise return NULL. */ -static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) -{ - spinlock_t *ptl = NULL; - -#if USE_SPLIT_PTLOCKS - ptl = __pte_lockptr(page); - spin_lock_nest_lock(ptl, &mm->page_table_lock); -#endif - - return ptl; -} - -static void xen_pte_unlock(void *v) -{ - spinlock_t *ptl = v; - spin_unlock(ptl); -} - -static void xen_do_pin(unsigned level, unsigned long pfn) -{ - struct mmuext_op op; - - op.cmd = level; - op.arg1.mfn = pfn_to_mfn(pfn); - - xen_extend_mmuext_op(&op); -} - -static int xen_pin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) -{ - unsigned pgfl = TestSetPagePinned(page); - int flush; - - if (pgfl) - flush = 0; /* already pinned */ - else if (PageHighMem(page)) - /* kmaps need flushing if we found an unpinned - highpage */ - flush = 1; - else { - void *pt = lowmem_page_address(page); - unsigned long pfn = page_to_pfn(page); - struct multicall_space mcs = __xen_mc_entry(0); - spinlock_t *ptl; - - flush = 0; - - /* - * We need to hold the pagetable lock between the time - * we make the pagetable RO and when we actually pin - * it. If we don't, then other users may come in and - * attempt to update the pagetable by writing it, - * which will fail because the memory is RO but not - * pinned, so Xen won't do the trap'n'emulate. - * - * If we're using split pte locks, we can't hold the - * entire pagetable's worth of locks during the - * traverse, because we may wrap the preempt count (8 - * bits). The solution is to mark RO and pin each PTE - * page while holding the lock. This means the number - * of locks we end up holding is never more than a - * batch size (~32 entries, at present). - * - * If we're not using split pte locks, we needn't pin - * the PTE pages independently, because we're - * protected by the overall pagetable lock. - */ - ptl = NULL; - if (level == PT_PTE) - ptl = xen_pte_lock(page, mm); - - MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, - pfn_pte(pfn, PAGE_KERNEL_RO), - level == PT_PGD ? UVMF_TLB_FLUSH : 0); - - if (ptl) { - xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); - - /* Queue a deferred unlock for when this batch - is completed. */ - xen_mc_callback(xen_pte_unlock, ptl); - } - } - - return flush; -} - -/* This is called just after a mm has been created, but it has not - been used yet. We need to make sure that its pagetable is all - read-only, and can be pinned. */ -static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) -{ - trace_xen_mmu_pgd_pin(mm, pgd); - - xen_mc_batch(); - - if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { - /* re-enable interrupts for flushing */ - xen_mc_issue(0); - - kmap_flush_unused(); - - xen_mc_batch(); - } - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); - - if (user_pgd) { - xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); - xen_do_pin(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa(user_pgd))); - } - } -#else /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is pinnable */ - xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif - xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); -#endif /* CONFIG_X86_64 */ - xen_mc_issue(0); -} - -static void xen_pgd_pin(struct mm_struct *mm) -{ - __xen_pgd_pin(mm, mm->pgd); -} - -/* - * On save, we need to pin all pagetables to make sure they get their - * mfns turned into pfns. Search the list for any unpinned pgds and pin - * them (unpinned pgds are not currently in use, probably because the - * process is under construction or destruction). - * - * Expected to be called in stop_machine() ("equivalent to taking - * every spinlock in the system"), so the locking doesn't really - * matter all that much. - */ -void xen_mm_pin_all(void) -{ - struct page *page; - - spin_lock(&pgd_lock); - - list_for_each_entry(page, &pgd_list, lru) { - if (!PagePinned(page)) { - __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page)); - SetPageSavePinned(page); - } - } - - spin_unlock(&pgd_lock); -} - -/* - * The init_mm pagetable is really pinned as soon as its created, but - * that's before we have page structures to store the bits. So do all - * the book-keeping now. - */ -static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, - enum pt_level level) -{ - SetPagePinned(page); - return 0; -} - -static void __init xen_mark_init_mm_pinned(void) -{ - xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); -} - -static int xen_unpin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) -{ - unsigned pgfl = TestClearPagePinned(page); - - if (pgfl && !PageHighMem(page)) { - void *pt = lowmem_page_address(page); - unsigned long pfn = page_to_pfn(page); - spinlock_t *ptl = NULL; - struct multicall_space mcs; - - /* - * Do the converse to pin_page. If we're using split - * pte locks, we must be holding the lock for while - * the pte page is unpinned but still RO to prevent - * concurrent updates from seeing it in this - * partially-pinned state. - */ - if (level == PT_PTE) { - ptl = xen_pte_lock(page, mm); - - if (ptl) - xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); - } - - mcs = __xen_mc_entry(0); - - MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, - pfn_pte(pfn, PAGE_KERNEL), - level == PT_PGD ? UVMF_TLB_FLUSH : 0); - - if (ptl) { - /* unlock when batch completed */ - xen_mc_callback(xen_pte_unlock, ptl); - } - } - - return 0; /* never need to flush on unpin */ -} - -/* Release a pagetables pages back as normal RW */ -static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) -{ - trace_xen_mmu_pgd_unpin(mm, pgd); - - xen_mc_batch(); - - xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) { - xen_do_pin(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(user_pgd))); - xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); - } - } -#endif - -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is unpinned */ - xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif - - __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); - - xen_mc_issue(0); -} - -static void xen_pgd_unpin(struct mm_struct *mm) -{ - __xen_pgd_unpin(mm, mm->pgd); -} - -/* - * On resume, undo any pinning done at save, so that the rest of the - * kernel doesn't see any unexpected pinned pagetables. - */ -void xen_mm_unpin_all(void) -{ - struct page *page; - - spin_lock(&pgd_lock); - - list_for_each_entry(page, &pgd_list, lru) { - if (PageSavePinned(page)) { - BUG_ON(!PagePinned(page)); - __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page)); - ClearPageSavePinned(page); - } - } - - spin_unlock(&pgd_lock); -} - -static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) -{ - spin_lock(&next->page_table_lock); - xen_pgd_pin(next); - spin_unlock(&next->page_table_lock); -} - -static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) -{ - spin_lock(&mm->page_table_lock); - xen_pgd_pin(mm); - spin_unlock(&mm->page_table_lock); -} - - -#ifdef CONFIG_SMP -/* Another cpu may still have their %cr3 pointing at the pagetable, so - we need to repoint it somewhere else before we can unpin it. */ -static void drop_other_mm_ref(void *info) -{ - struct mm_struct *mm = info; - struct mm_struct *active_mm; - - active_mm = this_cpu_read(cpu_tlbstate.active_mm); - - if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) - leave_mm(smp_processor_id()); - - /* If this cpu still has a stale cr3 reference, then make sure - it has been flushed. */ - if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd)) - load_cr3(swapper_pg_dir); -} - -static void xen_drop_mm_ref(struct mm_struct *mm) -{ - cpumask_var_t mask; - unsigned cpu; - - if (current->active_mm == mm) { - if (current->mm == mm) - load_cr3(swapper_pg_dir); - else - leave_mm(smp_processor_id()); - } - - /* Get the "official" set of cpus referring to our pagetable. */ - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) - && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) - continue; - smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); - } - return; - } - cpumask_copy(mask, mm_cpumask(mm)); - - /* It's possible that a vcpu may have a stale reference to our - cr3, because its in lazy mode, and it hasn't yet flushed - its set of pending hypercalls yet. In this case, we can - look at its actual current cr3 value, and force it to flush - if needed. */ - for_each_online_cpu(cpu) { - if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) - cpumask_set_cpu(cpu, mask); - } - - if (!cpumask_empty(mask)) - smp_call_function_many(mask, drop_other_mm_ref, mm, 1); - free_cpumask_var(mask); -} -#else -static void xen_drop_mm_ref(struct mm_struct *mm) -{ - if (current->active_mm == mm) - load_cr3(swapper_pg_dir); -} -#endif - -/* - * While a process runs, Xen pins its pagetables, which means that the - * hypervisor forces it to be read-only, and it controls all updates - * to it. This means that all pagetable updates have to go via the - * hypervisor, which is moderately expensive. - * - * Since we're pulling the pagetable down, we switch to use init_mm, - * unpin old process pagetable and mark it all read-write, which - * allows further operations on it to be simple memory accesses. - * - * The only subtle point is that another CPU may be still using the - * pagetable because of lazy tlb flushing. This means we need need to - * switch all CPUs off this pagetable before we can unpin it. - */ -static void xen_exit_mmap(struct mm_struct *mm) -{ - get_cpu(); /* make sure we don't move around */ - xen_drop_mm_ref(mm); - put_cpu(); - - spin_lock(&mm->page_table_lock); - - /* pgd may not be pinned in the error exit path of execve */ - if (xen_page_pinned(mm->pgd)) - xen_pgd_unpin(mm); - - spin_unlock(&mm->page_table_lock); -} - -static void __init xen_pagetable_setup_start(pgd_t *base) -{ -} - -static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) -{ - /* reserve the range used */ - native_pagetable_reserve(start, end); - - /* set as RW the rest */ - printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, - PFN_PHYS(pgt_buf_top)); - while (end < PFN_PHYS(pgt_buf_top)) { - make_lowmem_page_readwrite(__va(end)); - end += PAGE_SIZE; - } -} - -static void xen_post_allocator_init(void); - -static void __init xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); - xen_post_allocator_init(); -} - -static void xen_write_cr2(unsigned long cr2) -{ - this_cpu_read(xen_vcpu)->arch.cr2 = cr2; -} - -static unsigned long xen_read_cr2(void) -{ - return this_cpu_read(xen_vcpu)->arch.cr2; -} - -unsigned long xen_read_cr2_direct(void) -{ - return this_cpu_read(xen_vcpu_info.arch.cr2); -} - -static void xen_flush_tlb(void) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - trace_xen_mmu_flush_tlb(0); - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_LOCAL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_single(unsigned long addr) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - trace_xen_mmu_flush_tlb_single(addr); - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - op = mcs.args; - op->cmd = MMUEXT_INVLPG_LOCAL; - op->arg1.linear_addr = addr & PAGE_MASK; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_others(const struct cpumask *cpus, - struct mm_struct *mm, unsigned long va) -{ - struct { - struct mmuext_op op; -#ifdef CONFIG_SMP - DECLARE_BITMAP(mask, num_processors); -#else - DECLARE_BITMAP(mask, NR_CPUS); -#endif - } *args; - struct multicall_space mcs; - - trace_xen_mmu_flush_tlb_others(cpus, mm, va); - - if (cpumask_empty(cpus)) - return; /* nothing to do */ - - mcs = xen_mc_entry(sizeof(*args)); - args = mcs.args; - args->op.arg2.vcpumask = to_cpumask(args->mask); - - /* Remove us, and any offline CPUS. */ - cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); - - if (va == TLB_FLUSH_ALL) { - args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - } else { - args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = va; - } - - MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} - -static unsigned long xen_read_cr3(void) -{ - return this_cpu_read(xen_cr3); -} - -static void set_current_cr3(void *v) -{ - this_cpu_write(xen_current_cr3, (unsigned long)v); -} - -static void __xen_write_cr3(bool kernel, unsigned long cr3) -{ - struct mmuext_op op; - unsigned long mfn; - - trace_xen_mmu_write_cr3(kernel, cr3); - - if (cr3) - mfn = pfn_to_mfn(PFN_DOWN(cr3)); - else - mfn = 0; - - WARN_ON(mfn == 0 && kernel); - - op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; - op.arg1.mfn = mfn; - - xen_extend_mmuext_op(&op); - - if (kernel) { - this_cpu_write(xen_cr3, cr3); - - /* Update xen_current_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); - } -} - -static void xen_write_cr3(unsigned long cr3) -{ - BUG_ON(preemptible()); - - xen_mc_batch(); /* disables interrupts */ - - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - this_cpu_write(xen_cr3, cr3); - - __xen_write_cr3(true, cr3); - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif - - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ -} - -static int xen_pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = mm->pgd; - int ret = 0; - - BUG_ON(PagePinned(virt_to_page(pgd))); - -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; - - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { - user_pgd[pgd_index(VSYSCALL_START)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); - } -#endif - - return ret; -} - -static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifdef CONFIG_X86_64 - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) - free_page((unsigned long)user_pgd); -#endif -} - -#ifdef CONFIG_X86_32 -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} -#else /* CONFIG_X86_64 */ -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - unsigned long pfn = pte_pfn(pte); - - /* - * If the new pfn is within the range of the newly allocated - * kernel pagetable, and it isn't being mapped into an - * early_ioremap fixmap slot as a freshly allocated page, make sure - * it is RO. - */ - if (((!is_early_ioremap_ptep(ptep) && - pfn >= pgt_buf_start && pfn < pgt_buf_top)) || - (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) - pte = pte_wrprotect(pte); - - return pte; -} -#endif /* CONFIG_X86_64 */ - -/* Init-time set_pte while constructing initial pagetables, which - doesn't allow RO pagetable pages to be remapped RW */ -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - pte = mask_rw_pte(ptep, pte); - - xen_set_pte(ptep, pte); -} - -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); -} - -/* Early in boot, while setting up the initial pagetable, assume - everything is pinned. */ -static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) -{ -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); -} - -/* Used for pmd and pud */ -static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) -{ -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); -} - -/* Early release_pte assumes that all pts are pinned, since there's - only init_mm and anything attached to that is pinned. */ -static void __init xen_release_pte_init(unsigned long pfn) -{ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); -} - -static void __init xen_release_pmd_init(unsigned long pfn) -{ - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); -} - -static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct multicall_space mcs; - struct mmuext_op *op; - - mcs = __xen_mc_entry(sizeof(*op)); - op = mcs.args; - op->cmd = cmd; - op->arg1.mfn = pfn_to_mfn(pfn); - - MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); -} - -static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) -{ - struct multicall_space mcs; - unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT); - - mcs = __xen_mc_entry(0); - MULTI_update_va_mapping(mcs.mc, (unsigned long)addr, - pfn_pte(pfn, prot), 0); -} - -/* This needs to make sure the new pte page is pinned iff its being - attached to a pinned pagetable. */ -static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, - unsigned level) -{ - bool pinned = PagePinned(virt_to_page(mm->pgd)); - - trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); - - if (pinned) { - struct page *page = pfn_to_page(pfn); - - SetPagePinned(page); - - if (!PageHighMem(page)) { - xen_mc_batch(); - - __set_pfn_prot(pfn, PAGE_KERNEL_RO); - - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } - } -} - -static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PTE); -} - -static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PMD); -} - -/* This should never happen until we're OK to use struct page */ -static inline void xen_release_ptpage(unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - bool pinned = PagePinned(page); - - trace_xen_mmu_release_ptpage(pfn, level, pinned); - - if (pinned) { - if (!PageHighMem(page)) { - xen_mc_batch(); - - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - - __set_pfn_prot(pfn, PAGE_KERNEL); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - } - ClearPagePinned(page); - } -} - -static void xen_release_pte(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PTE); -} - -static void xen_release_pmd(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PMD); -} - -#if PAGETABLE_LEVELS == 4 -static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PUD); -} - -static void xen_release_pud(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PUD); -} -#endif - -void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} - -/* - * Like __va(), but returns address in the kernel mapping (which is - * all we have until the physical memory mapping has been set up. - */ -static void *__ka(phys_addr_t paddr) -{ -#ifdef CONFIG_X86_64 - return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif -} - -/* Convert a machine address to physical address */ -static unsigned long m2p(phys_addr_t maddr) -{ - phys_addr_t paddr; - - maddr &= PTE_PFN_MASK; - paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; - - return paddr; -} - -/* Convert a machine address to kernel virtual */ -static void *m2v(phys_addr_t maddr) -{ - return __ka(m2p(maddr)); -} - -/* Set the page permissions on an identity-mapped pages */ -static void set_page_prot(void *addr, pgprot_t prot) -{ - unsigned long pfn = __pa(addr) >> PAGE_SHIFT; - pte_t pte = pfn_pte(pfn, prot); - - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) - BUG(); -} - -static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, - PAGE_SIZE); - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == LEVEL1_IDENT_ENTRIES) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - -#ifdef CONFIG_X86_32 - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; -#endif - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} - -void __init xen_setup_machphys_mapping(void) -{ - struct xen_machphys_mapping mapping; - - if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { - machine_to_phys_mapping = (unsigned long *)mapping.v_start; - machine_to_phys_nr = mapping.max_mfn + 1; - } else { - machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; - } -#ifdef CONFIG_X86_32 - WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1)) - < machine_to_phys_mapping); -#endif -} - -#ifdef CONFIG_X86_64 -static void convert_pfn_mfn(void *v) -{ - pte_t *pte = v; - int i; - - /* All levels are converted the same way, so just treat them - as ptes. */ - for (i = 0; i < PTRS_PER_PTE; i++) - pte[i] = xen_make_pte(pte[i].pte); -} - -/* - * Set up the initial kernel pagetable. - * - * We can construct this by grafting the Xen provided pagetable into - * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - */ -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pud_t *l3; - pmd_t *l2; - - /* max_pfn_mapped is the last pfn mapped in the initial memory - * mappings. Considering that on Xen after the kernel mappings we - * have the mappings of some pages that don't exist in pfn space, we - * set max_pfn_mapped to the last real pfn mapped. */ - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); - - /* Zap identity mapping */ - init_level4_pgt[0] = __pgd(0); - - /* Pre-constructed entries are in pfn, so convert to mfn */ - convert_pfn_mfn(init_level4_pgt); - convert_pfn_mfn(level3_ident_pgt); - convert_pfn_mfn(level3_kernel_pgt); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); - - /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); - - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - /* Switch over */ - pgd = init_level4_pgt; - - /* - * At this stage there can be no user pgd, and no page - * structure to attach it to, so make sure we just set kernel - * pgd. - */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); - - return pgd; -} -#else /* !CONFIG_X86_64 */ -static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); -static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); - -static void __init xen_write_cr3_init(unsigned long cr3) -{ - unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); - - BUG_ON(read_cr3() != __pa(initial_page_table)); - BUG_ON(cr3 != __pa(swapper_pg_dir)); - - /* - * We are switching to swapper_pg_dir for the first time (from - * initial_page_table) and therefore need to mark that page - * read-only and then pin it. - * - * Xen disallows sharing of kernel PMDs for PAE - * guests. Therefore we must copy the kernel PMD from - * initial_page_table into a new kernel PMD to be used in - * swapper_pg_dir. - */ - swapper_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - memcpy(swapper_kernel_pmd, initial_kernel_pmd, - sizeof(pmd_t) * PTRS_PER_PMD); - swapper_pg_dir[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); - set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); - - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - xen_write_cr3(cr3); - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(initial_page_table))); - set_page_prot(initial_page_table, PAGE_KERNEL); - set_page_prot(initial_kernel_pmd, PAGE_KERNEL); - - pv_mmu_ops.write_cr3 = &xen_write_cr3; -} - -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - initial_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + - xen_start_info->nr_pt_frames * PAGE_SIZE + - 512*1024); - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - - xen_map_identity_early(initial_kernel_pmd, max_pfn); - - memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - initial_page_table[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); - - set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); - set_page_prot(initial_page_table, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, - PFN_DOWN(__pa(initial_page_table))); - xen_write_cr3(__pa(initial_page_table)); - - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); - - return initial_page_table; -} -#endif /* CONFIG_X86_64 */ - -static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; -static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss; - -static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) -{ - pte_t pte; - - phys >>= PAGE_SHIFT; - - switch (idx) { - case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_F00F_BUG - case FIX_F00F_IDT: -#endif -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: - case FIX_VDSO: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#else - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: - case VVAR_PAGE: -#endif - case FIX_TEXT_POKE0: - case FIX_TEXT_POKE1: - /* All local page mappings */ - pte = pfn_pte(phys, prot); - break; - -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ - pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); - break; -#endif - -#ifdef CONFIG_X86_IO_APIC - case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END: - /* - * We just don't map the IO APIC - all access is via - * hypercalls. Keep the address in the pte for reference. - */ - pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL); - break; -#endif - - case FIX_PARAVIRT_BOOTMAP: - /* This is an MFN, but it isn't an IO mapping from the - IO domain */ - pte = mfn_pte(phys, prot); - break; - - default: - /* By default, set_fixmap is used for hardware mappings */ - pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); - break; - } - - __native_set_fixmap(idx, pte); - -#ifdef CONFIG_X86_64 - /* Replicate changes to map the vsyscall page into the user - pagetable vsyscall mapping. */ - if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) || - idx == VVAR_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); - set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } -#endif -} - -void __init xen_ident_map_ISA(void) -{ - unsigned long pa; - - /* - * If we're dom0, then linear map the ISA machine addresses into - * the kernel's address space. - */ - if (!xen_initial_domain()) - return; - - xen_raw_printk("Xen: setup ISA identity maps\n"); - - for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { - pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); - - if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) - BUG(); - } - - xen_flush_tlb(); -} - -static void __init xen_post_allocator_init(void) -{ - pv_mmu_ops.set_pte = xen_set_pte; - pv_mmu_ops.set_pmd = xen_set_pmd; - pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.set_pgd = xen_set_pgd; -#endif - - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; -#endif - -#ifdef CONFIG_X86_64 - SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif - xen_mark_init_mm_pinned(); -} - -static void xen_leave_lazy_mmu(void) -{ - preempt_disable(); - xen_mc_flush(); - paravirt_leave_lazy_mmu(); - preempt_enable(); -} - -static const struct pv_mmu_ops xen_mmu_ops __initconst = { - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, -#ifdef CONFIG_X86_32 - .write_cr3 = xen_write_cr3_init, -#else - .write_cr3 = xen_write_cr3, -#endif - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, - .flush_tlb_others = xen_flush_tlb_others, - - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, - - .alloc_pte = xen_alloc_pte_init, - .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pmd_init, - .release_pmd = xen_release_pmd_init, - - .set_pte = xen_set_pte_init, - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd_hyper, - - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, - - .pte_val = PV_CALLEE_SAVE(xen_pte_val), - .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - - .make_pte = PV_CALLEE_SAVE(xen_make_pte), - .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ - .set_pud = xen_set_pud_hyper, - - .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), - .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), - -#if PAGETABLE_LEVELS == 4 - .pud_val = PV_CALLEE_SAVE(xen_pud_val), - .make_pud = PV_CALLEE_SAVE(xen_make_pud), - .set_pgd = xen_set_pgd_hyper, - - .alloc_pud = xen_alloc_pmd_init, - .release_pud = xen_release_pmd_init, -#endif /* PAGETABLE_LEVELS == 4 */ - - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, - .exit_mmap = xen_exit_mmap, - - .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy_mmu, - }, - - .set_fixmap = xen_set_fixmap, -}; - -void __init xen_init_mmu_ops(void) -{ - x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; - x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; - pv_mmu_ops = xen_mmu_ops; - - memset(dummy_mapping, 0xff, PAGE_SIZE); - memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE); -} - -/* Protected by xen_reservation_lock. */ -#define MAX_CONTIG_ORDER 9 /* 2MB */ -static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; - -#define VOID_PTE (mfn_pte(0, __pgprot(0))) -static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, - unsigned long *in_frames, - unsigned long *out_frames) -{ - int i; - struct multicall_space mcs; - - xen_mc_batch(); - for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) { - mcs = __xen_mc_entry(0); - - if (in_frames) - in_frames[i] = virt_to_mfn(vaddr); - - MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); - __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); - - if (out_frames) - out_frames[i] = virt_to_pfn(vaddr); - } - xen_mc_issue(0); -} - -/* - * Update the pfn-to-mfn mappings for a virtual address range, either to - * point to an array of mfns, or contiguously from a single starting - * mfn. - */ -static void xen_remap_exchanged_ptes(unsigned long vaddr, int order, - unsigned long *mfns, - unsigned long first_mfn) -{ - unsigned i, limit; - unsigned long mfn; - - xen_mc_batch(); - - limit = 1u << order; - for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) { - struct multicall_space mcs; - unsigned flags; - - mcs = __xen_mc_entry(0); - if (mfns) - mfn = mfns[i]; - else - mfn = first_mfn + i; - - if (i < (limit - 1)) - flags = 0; - else { - if (order == 0) - flags = UVMF_INVLPG | UVMF_ALL; - else - flags = UVMF_TLB_FLUSH | UVMF_ALL; - } - - MULTI_update_va_mapping(mcs.mc, vaddr, - mfn_pte(mfn, PAGE_KERNEL), flags); - - set_phys_to_machine(virt_to_pfn(vaddr), mfn); - } - - xen_mc_issue(0); -} - -/* - * Perform the hypercall to exchange a region of our pfns to point to - * memory with the required contiguous alignment. Takes the pfns as - * input, and populates mfns as output. - * - * Returns a success code indicating whether the hypervisor was able to - * satisfy the request or not. - */ -static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in, - unsigned long *pfns_in, - unsigned long extents_out, - unsigned int order_out, - unsigned long *mfns_out, - unsigned int address_bits) -{ - long rc; - int success; - - struct xen_memory_exchange exchange = { - .in = { - .nr_extents = extents_in, - .extent_order = order_in, - .extent_start = pfns_in, - .domid = DOMID_SELF - }, - .out = { - .nr_extents = extents_out, - .extent_order = order_out, - .extent_start = mfns_out, - .address_bits = address_bits, - .domid = DOMID_SELF - } - }; - - BUG_ON(extents_in << order_in != extents_out << order_out); - - rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); - success = (exchange.nr_exchanged == extents_in); - - BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); - BUG_ON(success && (rc != 0)); - - return success; -} - -int xen_create_contiguous_region(unsigned long vstart, unsigned int order, - unsigned int address_bits) -{ - unsigned long *in_frames = discontig_frames, out_frame; - unsigned long flags; - int success; - - /* - * Currently an auto-translated guest will not perform I/O, nor will - * it require PAE page directories below 4GB. Therefore any calls to - * this function are redundant and can be ignored. - */ - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 0; - - if (unlikely(order > MAX_CONTIG_ORDER)) - return -ENOMEM; - - memset((void *) vstart, 0, PAGE_SIZE << order); - - spin_lock_irqsave(&xen_reservation_lock, flags); - - /* 1. Zap current PTEs, remembering MFNs. */ - xen_zap_pfn_range(vstart, order, in_frames, NULL); - - /* 2. Get a new contiguous memory extent. */ - out_frame = virt_to_pfn(vstart); - success = xen_exchange_memory(1UL << order, 0, in_frames, - 1, order, &out_frame, - address_bits); - - /* 3. Map the new extent in place of old pages. */ - if (success) - xen_remap_exchanged_ptes(vstart, order, NULL, out_frame); - else - xen_remap_exchanged_ptes(vstart, order, in_frames, 0); - - spin_unlock_irqrestore(&xen_reservation_lock, flags); - - return success ? 0 : -ENOMEM; -} -EXPORT_SYMBOL_GPL(xen_create_contiguous_region); - -void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) -{ - unsigned long *out_frames = discontig_frames, in_frame; - unsigned long flags; - int success; - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; - - if (unlikely(order > MAX_CONTIG_ORDER)) - return; - - memset((void *) vstart, 0, PAGE_SIZE << order); - - spin_lock_irqsave(&xen_reservation_lock, flags); - - /* 1. Find start MFN of contiguous extent. */ - in_frame = virt_to_mfn(vstart); - - /* 2. Zap current PTEs. */ - xen_zap_pfn_range(vstart, order, NULL, out_frames); - - /* 3. Do the exchange for non-contiguous MFNs. */ - success = xen_exchange_memory(1, order, &in_frame, 1UL << order, - 0, out_frames, 0); - - /* 4. Map new pages in place of old pages. */ - if (success) - xen_remap_exchanged_ptes(vstart, order, out_frames, 0); - else - xen_remap_exchanged_ptes(vstart, order, NULL, in_frame); - - spin_unlock_irqrestore(&xen_reservation_lock, flags); -} -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); - -#ifdef CONFIG_XEN_PVHVM -static void xen_hvm_exit_mmap(struct mm_struct *mm) -{ - struct xen_hvm_pagetable_dying a; - int rc; - - a.domid = DOMID_SELF; - a.gpa = __pa(mm->pgd); - rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); - WARN_ON_ONCE(rc < 0); -} - -static int is_pagetable_dying_supported(void) -{ - struct xen_hvm_pagetable_dying a; - int rc = 0; - - a.domid = DOMID_SELF; - a.gpa = 0x00; - rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); - if (rc < 0) { - printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n"); - return 0; - } - return 1; -} - -void __init xen_hvm_init_mmu_ops(void) -{ - if (is_pagetable_dying_supported()) - pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; -} -#endif - -#define REMAP_BATCH_SIZE 16 - -struct remap_data { - unsigned long mfn; - pgprot_t prot; - struct mmu_update *mmu_update; -}; - -static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) -{ - struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); - - rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; - rmd->mmu_update->val = pte_val_ma(pte); - rmd->mmu_update++; - - return 0; -} - -int xen_remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) -{ - struct remap_data rmd; - struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - int batch; - unsigned long range; - int err = 0; - - prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == - (VM_PFNMAP | VM_RESERVED | VM_IO))); - - rmd.mfn = mfn; - rmd.prot = prot; - - while (nr) { - batch = min(REMAP_BATCH_SIZE, nr); - range = (unsigned long)batch << PAGE_SHIFT; - - rmd.mmu_update = mmu_update; - err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_mfn_pte_fn, &rmd); - if (err) - goto out; - - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) - goto out; - - nr -= batch; - addr += range; - } - - err = 0; -out: - - flush_tlb_all(); - - return err; -} -EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); diff --git a/ANDROID_3.4.5/arch/x86/xen/mmu.h b/ANDROID_3.4.5/arch/x86/xen/mmu.h deleted file mode 100644 index 73809bb9..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/mmu.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _XEN_MMU_H - -#include <linux/linkage.h> -#include <asm/page.h> - -enum pt_level { - PT_PGD, - PT_PUD, - PT_PMD, - PT_PTE -}; - - -bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); - -void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); - -unsigned long xen_read_cr2_direct(void); - -extern void xen_init_mmu_ops(void); -extern void xen_hvm_init_mmu_ops(void); -#endif /* _XEN_MMU_H */ diff --git a/ANDROID_3.4.5/arch/x86/xen/multicalls.c b/ANDROID_3.4.5/arch/x86/xen/multicalls.c deleted file mode 100644 index 0d82003e..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/multicalls.c +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Xen hypercall batching. - * - * Xen allows multiple hypercalls to be issued at once, using the - * multicall interface. This allows the cost of trapping into the - * hypervisor to be amortized over several calls. - * - * This file implements a simple interface for multicalls. There's a - * per-cpu buffer of outstanding multicalls. When you want to queue a - * multicall for issuing, you can allocate a multicall slot for the - * call and its arguments, along with storage for space which is - * pointed to by the arguments (for passing pointers to structures, - * etc). When the multicall is actually issued, all the space for the - * commands and allocated memory is freed for reuse. - * - * Multicalls are flushed whenever any of the buffers get full, or - * when explicitly requested. There's no way to get per-multicall - * return results back. It will BUG if any of the multicalls fail. - * - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ -#include <linux/percpu.h> -#include <linux/hardirq.h> -#include <linux/debugfs.h> - -#include <asm/xen/hypercall.h> - -#include "multicalls.h" -#include "debugfs.h" - -#define MC_BATCH 32 - -#define MC_DEBUG 0 - -#define MC_ARGS (MC_BATCH * 16) - - -struct mc_buffer { - unsigned mcidx, argidx, cbidx; - struct multicall_entry entries[MC_BATCH]; -#if MC_DEBUG - struct multicall_entry debug[MC_BATCH]; - void *caller[MC_BATCH]; -#endif - unsigned char args[MC_ARGS]; - struct callback { - void (*fn)(void *); - void *data; - } callbacks[MC_BATCH]; -}; - -static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); -DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); - -void xen_mc_flush(void) -{ - struct mc_buffer *b = &__get_cpu_var(mc_buffer); - struct multicall_entry *mc; - int ret = 0; - unsigned long flags; - int i; - - BUG_ON(preemptible()); - - /* Disable interrupts in case someone comes in and queues - something in the middle */ - local_irq_save(flags); - - trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); - - switch (b->mcidx) { - case 0: - /* no-op */ - BUG_ON(b->argidx != 0); - break; - - case 1: - /* Singleton multicall - bypass multicall machinery - and just do the call directly. */ - mc = &b->entries[0]; - - mc->result = privcmd_call(mc->op, - mc->args[0], mc->args[1], mc->args[2], - mc->args[3], mc->args[4]); - ret = mc->result < 0; - break; - - default: -#if MC_DEBUG - memcpy(b->debug, b->entries, - b->mcidx * sizeof(struct multicall_entry)); -#endif - - if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) - BUG(); - for (i = 0; i < b->mcidx; i++) - if (b->entries[i].result < 0) - ret++; - -#if MC_DEBUG - if (ret) { - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", - ret, smp_processor_id()); - dump_stack(); - for (i = 0; i < b->mcidx; i++) { - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n", - i+1, b->mcidx, - b->debug[i].op, - b->debug[i].args[0], - b->entries[i].result, - b->caller[i]); - } - } -#endif - } - - b->mcidx = 0; - b->argidx = 0; - - for (i = 0; i < b->cbidx; i++) { - struct callback *cb = &b->callbacks[i]; - - (*cb->fn)(cb->data); - } - b->cbidx = 0; - - local_irq_restore(flags); - - WARN_ON(ret); -} - -struct multicall_space __xen_mc_entry(size_t args) -{ - struct mc_buffer *b = &__get_cpu_var(mc_buffer); - struct multicall_space ret; - unsigned argidx = roundup(b->argidx, sizeof(u64)); - - trace_xen_mc_entry_alloc(args); - - BUG_ON(preemptible()); - BUG_ON(b->argidx >= MC_ARGS); - - if (unlikely(b->mcidx == MC_BATCH || - (argidx + args) >= MC_ARGS)) { - trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ? - XEN_MC_FL_BATCH : XEN_MC_FL_ARGS); - xen_mc_flush(); - argidx = roundup(b->argidx, sizeof(u64)); - } - - ret.mc = &b->entries[b->mcidx]; -#if MC_DEBUG - b->caller[b->mcidx] = __builtin_return_address(0); -#endif - b->mcidx++; - ret.args = &b->args[argidx]; - b->argidx = argidx + args; - - BUG_ON(b->argidx >= MC_ARGS); - return ret; -} - -struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) -{ - struct mc_buffer *b = &__get_cpu_var(mc_buffer); - struct multicall_space ret = { NULL, NULL }; - - BUG_ON(preemptible()); - BUG_ON(b->argidx >= MC_ARGS); - - if (unlikely(b->mcidx == 0 || - b->entries[b->mcidx - 1].op != op)) { - trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP); - goto out; - } - - if (unlikely((b->argidx + size) >= MC_ARGS)) { - trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE); - goto out; - } - - ret.mc = &b->entries[b->mcidx - 1]; - ret.args = &b->args[b->argidx]; - b->argidx += size; - - BUG_ON(b->argidx >= MC_ARGS); - - trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK); -out: - return ret; -} - -void xen_mc_callback(void (*fn)(void *), void *data) -{ - struct mc_buffer *b = &__get_cpu_var(mc_buffer); - struct callback *cb; - - if (b->cbidx == MC_BATCH) { - trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK); - xen_mc_flush(); - } - - trace_xen_mc_callback(fn, data); - - cb = &b->callbacks[b->cbidx++]; - cb->fn = fn; - cb->data = data; -} diff --git a/ANDROID_3.4.5/arch/x86/xen/multicalls.h b/ANDROID_3.4.5/arch/x86/xen/multicalls.h deleted file mode 100644 index 9c2e74f9..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/multicalls.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _XEN_MULTICALLS_H -#define _XEN_MULTICALLS_H - -#include <trace/events/xen.h> - -#include "xen-ops.h" - -/* Multicalls */ -struct multicall_space -{ - struct multicall_entry *mc; - void *args; -}; - -/* Allocate room for a multicall and its args */ -struct multicall_space __xen_mc_entry(size_t args); - -DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); - -/* Call to start a batch of multiple __xen_mc_entry()s. Must be - paired with xen_mc_issue() */ -static inline void xen_mc_batch(void) -{ - unsigned long flags; - - /* need to disable interrupts until this entry is complete */ - local_irq_save(flags); - trace_xen_mc_batch(paravirt_get_lazy_mode()); - __this_cpu_write(xen_mc_irq_flags, flags); -} - -static inline struct multicall_space xen_mc_entry(size_t args) -{ - xen_mc_batch(); - return __xen_mc_entry(args); -} - -/* Flush all pending multicalls */ -void xen_mc_flush(void); - -/* Issue a multicall if we're not in a lazy mode */ -static inline void xen_mc_issue(unsigned mode) -{ - trace_xen_mc_issue(mode); - - if ((paravirt_get_lazy_mode() & mode) == 0) - xen_mc_flush(); - - /* restore flags saved in xen_mc_batch */ - local_irq_restore(this_cpu_read(xen_mc_irq_flags)); -} - -/* Set up a callback to be called when the current batch is flushed */ -void xen_mc_callback(void (*fn)(void *), void *data); - -/* - * Try to extend the arguments of the previous multicall command. The - * previous command's op must match. If it does, then it attempts to - * extend the argument space allocated to the multicall entry by - * arg_size bytes. - * - * The returned multicall_space will return with mc pointing to the - * command on success, or NULL on failure, and args pointing to the - * newly allocated space. - */ -struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); - -#endif /* _XEN_MULTICALLS_H */ diff --git a/ANDROID_3.4.5/arch/x86/xen/p2m.c b/ANDROID_3.4.5/arch/x86/xen/p2m.c deleted file mode 100644 index 1b267e75..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/p2m.c +++ /dev/null @@ -1,949 +0,0 @@ -/* - * Xen leaves the responsibility for maintaining p2m mappings to the - * guests themselves, but it must also access and update the p2m array - * during suspend/resume when all the pages are reallocated. - * - * The p2m table is logically a flat array, but we implement it as a - * three-level tree to allow the address space to be sparse. - * - * Xen - * | - * p2m_top p2m_top_mfn - * / \ / \ - * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn - * / \ / \ / / - * p2m p2m p2m p2m p2m p2m p2m ... - * - * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. - * - * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the - * maximum representable pseudo-physical address space is: - * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages - * - * P2M_PER_PAGE depends on the architecture, as a mfn is always - * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. - * - * In short, these structures contain the Machine Frame Number (MFN) of the PFN. - * - * However not all entries are filled with MFNs. Specifically for all other - * leaf entries, or for the top root, or middle one, for which there is a void - * entry, we assume it is "missing". So (for example) - * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. - * - * We also have the possibility of setting 1-1 mappings on certain regions, so - * that: - * pfn_to_mfn(0xc0000)=0xc0000 - * - * The benefit of this is, that we can assume for non-RAM regions (think - * PCI BARs, or ACPI spaces), we can create mappings easily b/c we - * get the PFN value to match the MFN. - * - * For this to work efficiently we have one new page p2m_identity and - * allocate (via reserved_brk) any other pages we need to cover the sides - * (1GB or 4MB boundary violations). All entries in p2m_identity are set to - * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, - * no other fancy value). - * - * On lookup we spot that the entry points to p2m_identity and return the - * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. - * If the entry points to an allocated page, we just proceed as before and - * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in - * appropriate functions (pfn_to_mfn). - * - * The reason for having the IDENTITY_FRAME_BIT instead of just returning the - * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a - * non-identity pfn. To protect ourselves against we elect to set (and get) the - * IDENTITY_FRAME_BIT on all identity mapped PFNs. - * - * This simplistic diagram is used to explain the more subtle piece of code. - * There is also a digram of the P2M at the end that can help. - * Imagine your E820 looking as so: - * - * 1GB 2GB - * /-------------------+---------\/----\ /----------\ /---+-----\ - * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | - * \-------------------+---------/\----/ \----------/ \---+-----/ - * ^- 1029MB ^- 2001MB - * - * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), - * 2048MB = 524288 (0x80000)] - * - * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB - * is actually not present (would have to kick the balloon driver to put it in). - * - * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: - * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start - * of the PFN and the end PFN (263424 and 512256 respectively). The first step - * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page - * covers 512^2 of page estate (1GB) and in case the start or end PFN is not - * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn - * to end pfn. We reserve_brk top leaf pages if they are missing (means they - * point to p2m_mid_missing). - * - * With the E820 example above, 263424 is not 1GB aligned so we allocate a - * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. - * Each entry in the allocate page is "missing" (points to p2m_missing). - * - * Next stage is to determine if we need to do a more granular boundary check - * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. - * We check if the start pfn and end pfn violate that boundary check, and if - * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer - * granularity of setting which PFNs are missing and which ones are identity. - * In our example 263424 and 512256 both fail the check so we reserve_brk two - * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" - * values) and assign them to p2m[1][2] and p2m[1][488] respectively. - * - * At this point we would at minimum reserve_brk one page, but could be up to - * three. Each call to set_phys_range_identity has at maximum a three page - * cost. If we were to query the P2M at this stage, all those entries from - * start PFN through end PFN (so 1029MB -> 2001MB) would return - * INVALID_P2M_ENTRY ("missing"). - * - * The next step is to walk from the start pfn to the end pfn setting - * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. - * If we find that the middle leaf is pointing to p2m_missing we can swap it - * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this - * point we do not need to worry about boundary aligment (so no need to - * reserve_brk a middle page, figure out which PFNs are "missing" and which - * ones are identity), as that has been done earlier. If we find that the - * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference - * that page (which covers 512 PFNs) and set the appropriate PFN with - * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we - * set from p2m[1][2][256->511] and p2m[1][488][0->256] with - * IDENTITY_FRAME_BIT set. - * - * All other regions that are void (or not filled) either point to p2m_missing - * (considered missing) or have the default value of INVALID_P2M_ENTRY (also - * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] - * contain the INVALID_P2M_ENTRY value and are considered "missing." - * - * This is what the p2m ends up looking (for the E820 above) with this - * fabulous drawing: - * - * p2m /--------------\ - * /-----\ | &mfn_list[0],| /-----------------\ - * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | - * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | - * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | - * |-----| \ | [p2m_identity]+\\ | .... | - * | 2 |--\ \-------------------->| ... | \\ \----------------/ - * |-----| \ \---------------/ \\ - * | 3 |\ \ \\ p2m_identity - * |-----| \ \-------------------->/---------------\ /-----------------\ - * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | - * \-----/ / | [p2m_identity]+-->| ..., ~0 | - * / /---------------\ | .... | \-----------------/ - * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | - * / | IDENTITY[@256]|<----/ \---------------/ - * / | ~0, ~0, .... | - * | \---------------/ - * | - * p2m_missing p2m_missing - * /------------------\ /------------\ - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | - * | [p2m_mid_missing]+---->| ..., ~0 | - * \------------------/ \------------/ - * - * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/list.h> -#include <linux/hash.h> -#include <linux/sched.h> -#include <linux/seq_file.h> - -#include <asm/cache.h> -#include <asm/setup.h> - -#include <asm/xen/page.h> -#include <asm/xen/hypercall.h> -#include <asm/xen/hypervisor.h> -#include <xen/grant_table.h> - -#include "multicalls.h" -#include "xen-ops.h" - -static void __init m2p_override_init(void); - -unsigned long xen_max_p2m_pfn __read_mostly; - -#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) -#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) -#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) - -#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) - -/* Placeholders for holes in the address space */ -static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); - -static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); - -static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); - -RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); -RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); - -/* We might hit two boundary violations at the start and end, at max each - * boundary violation will require three middle nodes. */ -RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); - -static inline unsigned p2m_top_index(unsigned long pfn) -{ - BUG_ON(pfn >= MAX_P2M_PFN); - return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); -} - -static inline unsigned p2m_mid_index(unsigned long pfn) -{ - return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; -} - -static inline unsigned p2m_index(unsigned long pfn) -{ - return pfn % P2M_PER_PAGE; -} - -static void p2m_top_init(unsigned long ***top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = p2m_mid_missing; -} - -static void p2m_top_mfn_init(unsigned long *top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = virt_to_mfn(p2m_mid_missing_mfn); -} - -static void p2m_top_mfn_p_init(unsigned long **top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = p2m_mid_missing_mfn; -} - -static void p2m_mid_init(unsigned long **mid) -{ - unsigned i; - - for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = p2m_missing; -} - -static void p2m_mid_mfn_init(unsigned long *mid) -{ - unsigned i; - - for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = virt_to_mfn(p2m_missing); -} - -static void p2m_init(unsigned long *p2m) -{ - unsigned i; - - for (i = 0; i < P2M_MID_PER_PAGE; i++) - p2m[i] = INVALID_P2M_ENTRY; -} - -/* - * Build the parallel p2m_top_mfn and p2m_mid_mfn structures - * - * This is called both at boot time, and after resuming from suspend: - * - At boot time we're called very early, and must use extend_brk() - * to allocate memory. - * - * - After resume we're called from within stop_machine, but the mfn - * tree should alreay be completely allocated. - */ -void __ref xen_build_mfn_list_list(void) -{ - unsigned long pfn; - - /* Pre-initialize p2m_top_mfn to be completely missing */ - if (p2m_top_mfn == NULL) { - p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(p2m_mid_missing_mfn); - - p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_mfn_p_init(p2m_top_mfn_p); - - p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_mfn_init(p2m_top_mfn); - } else { - /* Reinitialise, mfn's all change after migration */ - p2m_mid_mfn_init(p2m_mid_missing_mfn); - } - - for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - unsigned long **mid; - unsigned long *mid_mfn_p; - - mid = p2m_top[topidx]; - mid_mfn_p = p2m_top_mfn_p[topidx]; - - /* Don't bother allocating any mfn mid levels if - * they're just missing, just update the stored mfn, - * since all could have changed over a migrate. - */ - if (mid == p2m_mid_missing) { - BUG_ON(mididx); - BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); - p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); - pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; - continue; - } - - if (mid_mfn_p == p2m_mid_missing_mfn) { - /* - * XXX boot-time only! We should never find - * missing parts of the mfn tree after - * runtime. extend_brk() will BUG if we call - * it too late. - */ - mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(mid_mfn_p); - - p2m_top_mfn_p[topidx] = mid_mfn_p; - } - - p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); - mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); - } -} - -void xen_setup_mfn_list_list(void) -{ - BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); - - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = - virt_to_mfn(p2m_top_mfn); - HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; -} - -/* Set up p2m_top to point to the domain-builder provided p2m pages */ -void __init xen_build_dynamic_phys_to_machine(void) -{ - unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; - unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); - unsigned long pfn; - - xen_max_p2m_pfn = max_pfn; - - p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_missing); - - p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(p2m_mid_missing); - - p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_init(p2m_top); - - p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_identity); - - /* - * The domain builder gives us a pre-constructed p2m array in - * mfn_list for all the pages initially given to us, so we just - * need to graft that into our tree structure. - */ - for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - - if (p2m_top[topidx] == p2m_mid_missing) { - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(mid); - - p2m_top[topidx] = mid; - } - - /* - * As long as the mfn_list has enough entries to completely - * fill a p2m page, pointing into the array is ok. But if - * not the entries beyond the last pfn will be undefined. - */ - if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { - unsigned long p2midx; - - p2midx = max_pfn % P2M_PER_PAGE; - for ( ; p2midx < P2M_PER_PAGE; p2midx++) - mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; - } - p2m_top[topidx][mididx] = &mfn_list[pfn]; - } - - m2p_override_init(); -} - -unsigned long get_phys_to_machine(unsigned long pfn) -{ - unsigned topidx, mididx, idx; - - if (unlikely(pfn >= MAX_P2M_PFN)) - return INVALID_P2M_ENTRY; - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* - * The INVALID_P2M_ENTRY is filled in both p2m_*identity - * and in p2m_*missing, so returning the INVALID_P2M_ENTRY - * would be wrong. - */ - if (p2m_top[topidx][mididx] == p2m_identity) - return IDENTITY_FRAME(pfn); - - return p2m_top[topidx][mididx][idx]; -} -EXPORT_SYMBOL_GPL(get_phys_to_machine); - -static void *alloc_p2m_page(void) -{ - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); -} - -static void free_p2m_page(void *p) -{ - free_page((unsigned long)p); -} - -/* - * Fully allocate the p2m structure for a given pfn. We need to check - * that both the top and mid levels are allocated, and make sure the - * parallel mfn tree is kept in sync. We may race with other cpus, so - * the new pages are installed with cmpxchg; if we lose the race then - * simply free the page we allocated and use the one that's there. - */ -static bool alloc_p2m(unsigned long pfn) -{ - unsigned topidx, mididx; - unsigned long ***top_p, **mid; - unsigned long *top_mfn_p, *mid_mfn; - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - - top_p = &p2m_top[topidx]; - mid = *top_p; - - if (mid == p2m_mid_missing) { - /* Mid level is missing, allocate a new one */ - mid = alloc_p2m_page(); - if (!mid) - return false; - - p2m_mid_init(mid); - - if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) - free_p2m_page(mid); - } - - top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = p2m_top_mfn_p[topidx]; - - BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); - - if (mid_mfn == p2m_mid_missing_mfn) { - /* Separately check the mid mfn level */ - unsigned long missing_mfn; - unsigned long mid_mfn_mfn; - - mid_mfn = alloc_p2m_page(); - if (!mid_mfn) - return false; - - p2m_mid_mfn_init(mid_mfn); - - missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); - mid_mfn_mfn = virt_to_mfn(mid_mfn); - if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) - free_p2m_page(mid_mfn); - else - p2m_top_mfn_p[topidx] = mid_mfn; - } - - if (p2m_top[topidx][mididx] == p2m_identity || - p2m_top[topidx][mididx] == p2m_missing) { - /* p2m leaf page is missing */ - unsigned long *p2m; - unsigned long *p2m_orig = p2m_top[topidx][mididx]; - - p2m = alloc_p2m_page(); - if (!p2m) - return false; - - p2m_init(p2m); - - if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) - free_p2m_page(p2m); - else - mid_mfn[mididx] = virt_to_mfn(p2m); - } - - return true; -} - -static bool __init __early_alloc_p2m(unsigned long pfn) -{ - unsigned topidx, mididx, idx; - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* Pfff.. No boundary cross-over, lets get out. */ - if (!idx) - return false; - - WARN(p2m_top[topidx][mididx] == p2m_identity, - "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", - topidx, mididx); - - /* - * Could be done by xen_build_dynamic_phys_to_machine.. - */ - if (p2m_top[topidx][mididx] != p2m_missing) - return false; - - /* Boundary cross-over for the edges: */ - if (idx) { - unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); - unsigned long *mid_mfn_p; - - p2m_init(p2m); - - p2m_top[topidx][mididx] = p2m; - - /* For save/restore we need to MFN of the P2M saved */ - - mid_mfn_p = p2m_top_mfn_p[topidx]; - WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), - "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", - topidx, mididx); - mid_mfn_p[mididx] = virt_to_mfn(p2m); - - } - return idx != 0; -} -unsigned long __init set_phys_range_identity(unsigned long pfn_s, - unsigned long pfn_e) -{ - unsigned long pfn; - - if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) - return 0; - - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) - return pfn_e - pfn_s; - - if (pfn_s > pfn_e) - return 0; - - for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); - pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); - pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) - { - unsigned topidx = p2m_top_index(pfn); - unsigned long *mid_mfn_p; - unsigned long **mid; - - mid = p2m_top[topidx]; - mid_mfn_p = p2m_top_mfn_p[topidx]; - if (mid == p2m_mid_missing) { - mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - - p2m_mid_init(mid); - - p2m_top[topidx] = mid; - - BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); - } - /* And the save/restore P2M tables.. */ - if (mid_mfn_p == p2m_mid_missing_mfn) { - mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(mid_mfn_p); - - p2m_top_mfn_p[topidx] = mid_mfn_p; - p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); - /* Note: we don't set mid_mfn_p[midix] here, - * look in __early_alloc_p2m */ - } - } - - __early_alloc_p2m(pfn_s); - __early_alloc_p2m(pfn_e); - - for (pfn = pfn_s; pfn < pfn_e; pfn++) - if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) - break; - - if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), - "Identity mapping failed. We are %ld short of 1-1 mappings!\n", - (pfn_e - pfn_s) - (pfn - pfn_s))) - printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn); - - return pfn - pfn_s; -} - -/* Try to install p2m mapping; fail if intermediate bits missing */ -bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - unsigned topidx, mididx, idx; - - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return true; - } - if (unlikely(pfn >= MAX_P2M_PFN)) { - BUG_ON(mfn != INVALID_P2M_ENTRY); - return true; - } - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* For sparse holes were the p2m leaf has real PFN along with - * PCI holes, stick in the PFN as the MFN value. - */ - if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { - if (p2m_top[topidx][mididx] == p2m_identity) - return true; - - /* Swap over from MISSING to IDENTITY if needed. */ - if (p2m_top[topidx][mididx] == p2m_missing) { - WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, - p2m_identity) != p2m_missing); - return true; - } - } - - if (p2m_top[topidx][mididx] == p2m_missing) - return mfn == INVALID_P2M_ENTRY; - - p2m_top[topidx][mididx][idx] = mfn; - - return true; -} - -bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - if (!alloc_p2m(pfn)) - return false; - - if (!__set_phys_to_machine(pfn, mfn)) - return false; - } - - return true; -} - -#define M2P_OVERRIDE_HASH_SHIFT 10 -#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) - -static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); -static DEFINE_SPINLOCK(m2p_override_lock); - -static void __init m2p_override_init(void) -{ - unsigned i; - - m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, - sizeof(unsigned long)); - - for (i = 0; i < M2P_OVERRIDE_HASH; i++) - INIT_LIST_HEAD(&m2p_overrides[i]); -} - -static unsigned long mfn_hash(unsigned long mfn) -{ - return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); -} - -/* Add an MFN override for a particular page */ -int m2p_add_override(unsigned long mfn, struct page *page, - struct gnttab_map_grant_ref *kmap_op) -{ - unsigned long flags; - unsigned long pfn; - unsigned long uninitialized_var(address); - unsigned level; - pte_t *ptep = NULL; - - pfn = page_to_pfn(page); - if (!PageHighMem(page)) { - address = (unsigned long)__va(pfn << PAGE_SHIFT); - ptep = lookup_address(address, &level); - if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_add_override: pfn %lx not mapped", pfn)) - return -EINVAL; - } - WARN_ON(PagePrivate(page)); - SetPagePrivate(page); - set_page_private(page, mfn); - page->index = pfn_to_mfn(pfn); - - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) - return -ENOMEM; - - if (kmap_op != NULL) { - if (!PageHighMem(page)) { - struct multicall_space mcs = - xen_mc_entry(sizeof(*kmap_op)); - - MULTI_grant_table_op(mcs.mc, - GNTTABOP_map_grant_ref, kmap_op, 1); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - } - /* let's use dev_bus_addr to record the old mfn instead */ - kmap_op->dev_bus_addr = page->index; - page->index = (unsigned long) kmap_op; - } - spin_lock_irqsave(&m2p_override_lock, flags); - list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); - spin_unlock_irqrestore(&m2p_override_lock, flags); - - return 0; -} -EXPORT_SYMBOL_GPL(m2p_add_override); -int m2p_remove_override(struct page *page, bool clear_pte) -{ - unsigned long flags; - unsigned long mfn; - unsigned long pfn; - unsigned long uninitialized_var(address); - unsigned level; - pte_t *ptep = NULL; - - pfn = page_to_pfn(page); - mfn = get_phys_to_machine(pfn); - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) - return -EINVAL; - - if (!PageHighMem(page)) { - address = (unsigned long)__va(pfn << PAGE_SHIFT); - ptep = lookup_address(address, &level); - - if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_remove_override: pfn %lx not mapped", pfn)) - return -EINVAL; - } - - spin_lock_irqsave(&m2p_override_lock, flags); - list_del(&page->lru); - spin_unlock_irqrestore(&m2p_override_lock, flags); - WARN_ON(!PagePrivate(page)); - ClearPagePrivate(page); - - if (clear_pte) { - struct gnttab_map_grant_ref *map_op = - (struct gnttab_map_grant_ref *) page->index; - set_phys_to_machine(pfn, map_op->dev_bus_addr); - if (!PageHighMem(page)) { - struct multicall_space mcs; - struct gnttab_unmap_grant_ref *unmap_op; - - /* - * It might be that we queued all the m2p grant table - * hypercalls in a multicall, then m2p_remove_override - * get called before the multicall has actually been - * issued. In this case handle is going to -1 because - * it hasn't been modified yet. - */ - if (map_op->handle == -1) - xen_mc_flush(); - /* - * Now if map_op->handle is negative it means that the - * hypercall actually returned an error. - */ - if (map_op->handle == GNTST_general_error) { - printk(KERN_WARNING "m2p_remove_override: " - "pfn %lx mfn %lx, failed to modify kernel mappings", - pfn, mfn); - return -1; - } - - mcs = xen_mc_entry( - sizeof(struct gnttab_unmap_grant_ref)); - unmap_op = mcs.args; - unmap_op->host_addr = map_op->host_addr; - unmap_op->handle = map_op->handle; - unmap_op->dev_bus_addr = 0; - - MULTI_grant_table_op(mcs.mc, - GNTTABOP_unmap_grant_ref, unmap_op, 1); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - set_pte_at(&init_mm, address, ptep, - pfn_pte(pfn, PAGE_KERNEL)); - __flush_tlb_single(address); - map_op->host_addr = 0; - } - } else - set_phys_to_machine(pfn, page->index); - - return 0; -} -EXPORT_SYMBOL_GPL(m2p_remove_override); - -struct page *m2p_find_override(unsigned long mfn) -{ - unsigned long flags; - struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *ret; - - ret = NULL; - - spin_lock_irqsave(&m2p_override_lock, flags); - - list_for_each_entry(p, bucket, lru) { - if (page_private(p) == mfn) { - ret = p; - break; - } - } - - spin_unlock_irqrestore(&m2p_override_lock, flags); - - return ret; -} - -unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) -{ - struct page *p = m2p_find_override(mfn); - unsigned long ret = pfn; - - if (p) - ret = page_to_pfn(p); - - return ret; -} -EXPORT_SYMBOL_GPL(m2p_find_override_pfn); - -#ifdef CONFIG_XEN_DEBUG_FS -#include <linux/debugfs.h> -#include "debugfs.h" -static int p2m_dump_show(struct seq_file *m, void *v) -{ - static const char * const level_name[] = { "top", "middle", - "entry", "abnormal", "error"}; -#define TYPE_IDENTITY 0 -#define TYPE_MISSING 1 -#define TYPE_PFN 2 -#define TYPE_UNKNOWN 3 - static const char * const type_name[] = { - [TYPE_IDENTITY] = "identity", - [TYPE_MISSING] = "missing", - [TYPE_PFN] = "pfn", - [TYPE_UNKNOWN] = "abnormal"}; - unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; - unsigned int uninitialized_var(prev_level); - unsigned int uninitialized_var(prev_type); - - if (!p2m_top) - return 0; - - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - unsigned idx = p2m_index(pfn); - unsigned lvl, type; - - lvl = 4; - type = TYPE_UNKNOWN; - if (p2m_top[topidx] == p2m_mid_missing) { - lvl = 0; type = TYPE_MISSING; - } else if (p2m_top[topidx] == NULL) { - lvl = 0; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx] == NULL) { - lvl = 1; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx] == p2m_identity) { - lvl = 1; type = TYPE_IDENTITY; - } else if (p2m_top[topidx][mididx] == p2m_missing) { - lvl = 1; type = TYPE_MISSING; - } else if (p2m_top[topidx][mididx][idx] == 0) { - lvl = 2; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { - lvl = 2; type = TYPE_IDENTITY; - } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { - lvl = 2; type = TYPE_MISSING; - } else if (p2m_top[topidx][mididx][idx] == pfn) { - lvl = 2; type = TYPE_PFN; - } else if (p2m_top[topidx][mididx][idx] != pfn) { - lvl = 2; type = TYPE_PFN; - } - if (pfn == 0) { - prev_level = lvl; - prev_type = type; - } - if (pfn == MAX_DOMAIN_PAGES-1) { - lvl = 3; - type = TYPE_UNKNOWN; - } - if (prev_type != type) { - seq_printf(m, " [0x%lx->0x%lx] %s\n", - prev_pfn_type, pfn, type_name[prev_type]); - prev_pfn_type = pfn; - prev_type = type; - } - if (prev_level != lvl) { - seq_printf(m, " [0x%lx->0x%lx] level %s\n", - prev_pfn_level, pfn, level_name[prev_level]); - prev_pfn_level = pfn; - prev_level = lvl; - } - } - return 0; -#undef TYPE_IDENTITY -#undef TYPE_MISSING -#undef TYPE_PFN -#undef TYPE_UNKNOWN -} - -static int p2m_dump_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, p2m_dump_show, NULL); -} - -static const struct file_operations p2m_dump_fops = { - .open = p2m_dump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static struct dentry *d_mmu_debug; - -static int __init xen_p2m_debugfs(void) -{ - struct dentry *d_xen = xen_init_debugfs(); - - if (d_xen == NULL) - return -ENOMEM; - - d_mmu_debug = debugfs_create_dir("mmu", d_xen); - - debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); - return 0; -} -fs_initcall(xen_p2m_debugfs); -#endif /* CONFIG_XEN_DEBUG_FS */ diff --git a/ANDROID_3.4.5/arch/x86/xen/pci-swiotlb-xen.c b/ANDROID_3.4.5/arch/x86/xen/pci-swiotlb-xen.c deleted file mode 100644 index 967633ad..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/pci-swiotlb-xen.c +++ /dev/null @@ -1,67 +0,0 @@ -/* Glue code to lib/swiotlb-xen.c */ - -#include <linux/dma-mapping.h> -#include <linux/pci.h> -#include <xen/swiotlb-xen.h> - -#include <asm/xen/hypervisor.h> -#include <xen/xen.h> -#include <asm/iommu_table.h> - -int xen_swiotlb __read_mostly; - -static struct dma_map_ops xen_swiotlb_dma_ops = { - .mapping_error = xen_swiotlb_dma_mapping_error, - .alloc = xen_swiotlb_alloc_coherent, - .free = xen_swiotlb_free_coherent, - .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, - .sync_single_for_device = xen_swiotlb_sync_single_for_device, - .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, - .map_sg = xen_swiotlb_map_sg_attrs, - .unmap_sg = xen_swiotlb_unmap_sg_attrs, - .map_page = xen_swiotlb_map_page, - .unmap_page = xen_swiotlb_unmap_page, - .dma_supported = xen_swiotlb_dma_supported, -}; - -/* - * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary - * - * This returns non-zero if we are forced to use xen_swiotlb (by the boot - * option). - */ -int __init pci_xen_swiotlb_detect(void) -{ - - /* If running as PV guest, either iommu=soft, or swiotlb=force will - * activate this IOMMU. If running as PV privileged, activate it - * irregardless. - */ - if ((xen_initial_domain() || swiotlb || swiotlb_force) && - (xen_pv_domain())) - xen_swiotlb = 1; - - /* If we are running under Xen, we MUST disable the native SWIOTLB. - * Don't worry about swiotlb_force flag activating the native, as - * the 'swiotlb' flag is the only one turning it on. */ - if (xen_pv_domain()) - swiotlb = 0; - - return xen_swiotlb; -} - -void __init pci_xen_swiotlb_init(void) -{ - if (xen_swiotlb) { - xen_swiotlb_init(1); - dma_ops = &xen_swiotlb_dma_ops; - - /* Make sure ACS will be enabled */ - pci_request_acs(); - } -} -IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - 0, - pci_xen_swiotlb_init, - 0); diff --git a/ANDROID_3.4.5/arch/x86/xen/platform-pci-unplug.c b/ANDROID_3.4.5/arch/x86/xen/platform-pci-unplug.c deleted file mode 100644 index ffcf2615..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/platform-pci-unplug.c +++ /dev/null @@ -1,143 +0,0 @@ -/****************************************************************************** - * platform-pci-unplug.c - * - * Xen platform PCI device driver - * Copyright (c) 2010, Citrix - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <linux/init.h> -#include <linux/io.h> -#include <linux/module.h> - -#include <xen/platform_pci.h> - -#define XEN_PLATFORM_ERR_MAGIC -1 -#define XEN_PLATFORM_ERR_PROTOCOL -2 -#define XEN_PLATFORM_ERR_BLACKLIST -3 - -/* store the value of xen_emul_unplug after the unplug is done */ -int xen_platform_pci_unplug; -EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); -#ifdef CONFIG_XEN_PVHVM -static int xen_emul_unplug; - -static int check_platform_magic(void) -{ - short magic; - char protocol; - - magic = inw(XEN_IOPORT_MAGIC); - if (magic != XEN_IOPORT_MAGIC_VAL) { - printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n"); - return XEN_PLATFORM_ERR_MAGIC; - } - - protocol = inb(XEN_IOPORT_PROTOVER); - - printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n", - protocol); - - switch (protocol) { - case 1: - outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM); - outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER); - if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) { - printk(KERN_ERR "Xen Platform: blacklisted by host\n"); - return XEN_PLATFORM_ERR_BLACKLIST; - } - break; - default: - printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version"); - return XEN_PLATFORM_ERR_PROTOCOL; - } - - return 0; -} - -void xen_unplug_emulated_devices(void) -{ - int r; - - /* user explicitly requested no unplug */ - if (xen_emul_unplug & XEN_UNPLUG_NEVER) - return; - /* check the version of the xen platform PCI device */ - r = check_platform_magic(); - /* If the version matches enable the Xen platform PCI driver. - * Also enable the Xen platform PCI driver if the host does - * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC) - * but the user told us that unplugging is unnecessary. */ - if (r && !(r == XEN_PLATFORM_ERR_MAGIC && - (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))) - return; - /* Set the default value of xen_emul_unplug depending on whether or - * not the Xen PV frontends and the Xen platform PCI driver have - * been compiled for this kernel (modules or built-in are both OK). */ - if (!xen_emul_unplug) { - if (xen_must_unplug_nics()) { - printk(KERN_INFO "Netfront and the Xen platform PCI driver have " - "been compiled for this kernel: unplug emulated NICs.\n"); - xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; - } - if (xen_must_unplug_disks()) { - printk(KERN_INFO "Blkfront and the Xen platform PCI driver have " - "been compiled for this kernel: unplug emulated disks.\n" - "You might have to change the root device\n" - "from /dev/hd[a-d] to /dev/xvd[a-d]\n" - "in your root= kernel command line option\n"); - xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; - } - } - /* Now unplug the emulated devices */ - if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)) - outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); - xen_platform_pci_unplug = xen_emul_unplug; -} - -static int __init parse_xen_emul_unplug(char *arg) -{ - char *p, *q; - int l; - - for (p = arg; p; p = q) { - q = strchr(p, ','); - if (q) { - l = q - p; - q++; - } else { - l = strlen(p); - } - if (!strncmp(p, "all", l)) - xen_emul_unplug |= XEN_UNPLUG_ALL; - else if (!strncmp(p, "ide-disks", l)) - xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; - else if (!strncmp(p, "aux-ide-disks", l)) - xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; - else if (!strncmp(p, "nics", l)) - xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; - else if (!strncmp(p, "unnecessary", l)) - xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY; - else if (!strncmp(p, "never", l)) - xen_emul_unplug |= XEN_UNPLUG_NEVER; - else - printk(KERN_WARNING "unrecognised option '%s' " - "in parameter 'xen_emul_unplug'\n", p); - } - return 0; -} -early_param("xen_emul_unplug", parse_xen_emul_unplug); -#endif diff --git a/ANDROID_3.4.5/arch/x86/xen/setup.c b/ANDROID_3.4.5/arch/x86/xen/setup.c deleted file mode 100644 index 1ba8dff2..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/setup.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Machine specific setup for xen - * - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/pm.h> -#include <linux/memblock.h> -#include <linux/cpuidle.h> -#include <linux/cpufreq.h> - -#include <asm/elf.h> -#include <asm/vdso.h> -#include <asm/e820.h> -#include <asm/setup.h> -#include <asm/acpi.h> -#include <asm/xen/hypervisor.h> -#include <asm/xen/hypercall.h> - -#include <xen/xen.h> -#include <xen/page.h> -#include <xen/interface/callback.h> -#include <xen/interface/memory.h> -#include <xen/interface/physdev.h> -#include <xen/features.h> - -#include "xen-ops.h" -#include "vdso.h" - -/* These are code, but not functions. Defined in entry.S */ -extern const char xen_hypervisor_callback[]; -extern const char xen_failsafe_callback[]; -extern void xen_sysenter_target(void); -extern void xen_syscall_target(void); -extern void xen_syscall32_target(void); - -/* Amount of extra memory space we add to the e820 ranges */ -struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; - -/* Number of pages released from the initial allocation. */ -unsigned long xen_released_pages; - -/* - * The maximum amount of extra memory compared to the base size. The - * main scaling factor is the size of struct page. At extreme ratios - * of base:extra, all the base memory can be filled with page - * structures for the extra memory, leaving no space for anything - * else. - * - * 10x seems like a reasonable balance between scaling flexibility and - * leaving a practically usable system. - */ -#define EXTRA_MEM_RATIO (10) - -static void __init xen_add_extra_mem(u64 start, u64 size) -{ - unsigned long pfn; - int i; - - for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { - /* Add new region. */ - if (xen_extra_mem[i].size == 0) { - xen_extra_mem[i].start = start; - xen_extra_mem[i].size = size; - break; - } - /* Append to existing region. */ - if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { - xen_extra_mem[i].size += size; - break; - } - } - if (i == XEN_EXTRA_MEM_MAX_REGIONS) - printk(KERN_WARNING "Warning: not enough extra memory regions\n"); - - memblock_reserve(start, size); - - xen_max_p2m_pfn = PFN_DOWN(start + size); - - for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++) - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); -} - -static unsigned long __init xen_release_chunk(unsigned long start, - unsigned long end) -{ - struct xen_memory_reservation reservation = { - .address_bits = 0, - .extent_order = 0, - .domid = DOMID_SELF - }; - unsigned long len = 0; - unsigned long pfn; - int ret; - - for(pfn = start; pfn < end; pfn++) { - unsigned long mfn = pfn_to_mfn(pfn); - - /* Make sure pfn exists to start with */ - if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) - continue; - - set_xen_guest_handle(reservation.extent_start, &mfn); - reservation.nr_extents = 1; - - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); - WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); - if (ret == 1) { - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); - len++; - } - } - printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", - start, end, len); - - return len; -} - -static unsigned long __init xen_set_identity_and_release( - const struct e820entry *list, size_t map_size, unsigned long nr_pages) -{ - phys_addr_t start = 0; - unsigned long released = 0; - unsigned long identity = 0; - const struct e820entry *entry; - int i; - - /* - * Combine non-RAM regions and gaps until a RAM region (or the - * end of the map) is reached, then set the 1:1 map and - * release the pages (if available) in those non-RAM regions. - * - * The combined non-RAM regions are rounded to a whole number - * of pages so any partial pages are accessible via the 1:1 - * mapping. This is needed for some BIOSes that put (for - * example) the DMI tables in a reserved region that begins on - * a non-page boundary. - */ - for (i = 0, entry = list; i < map_size; i++, entry++) { - phys_addr_t end = entry->addr + entry->size; - - if (entry->type == E820_RAM || i == map_size - 1) { - unsigned long start_pfn = PFN_DOWN(start); - unsigned long end_pfn = PFN_UP(end); - - if (entry->type == E820_RAM) - end_pfn = PFN_UP(entry->addr); - - if (start_pfn < end_pfn) { - if (start_pfn < nr_pages) - released += xen_release_chunk( - start_pfn, min(end_pfn, nr_pages)); - - identity += set_phys_range_identity( - start_pfn, end_pfn); - } - start = end; - } - } - - printk(KERN_INFO "Released %lu pages of unused memory\n", released); - printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); - - return released; -} - -static unsigned long __init xen_get_max_pages(void) -{ - unsigned long max_pages = MAX_DOMAIN_PAGES; - domid_t domid = DOMID_SELF; - int ret; - - /* - * For the initial domain we use the maximum reservation as - * the maximum page. - * - * For guest domains the current maximum reservation reflects - * the current maximum rather than the static maximum. In this - * case the e820 map provided to us will cover the static - * maximum region. - */ - if (xen_initial_domain()) { - ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); - if (ret > 0) - max_pages = ret; - } - - return min(max_pages, MAX_DOMAIN_PAGES); -} - -static void xen_align_and_add_e820_region(u64 start, u64 size, int type) -{ - u64 end = start + size; - - /* Align RAM regions to page boundaries. */ - if (type == E820_RAM) { - start = PAGE_ALIGN(start); - end &= ~((u64)PAGE_SIZE - 1); - } - - e820_add_region(start, end - start, type); -} - -/** - * machine_specific_memory_setup - Hook for machine specific memory setup. - **/ -char * __init xen_memory_setup(void) -{ - static struct e820entry map[E820MAX] __initdata; - - unsigned long max_pfn = xen_start_info->nr_pages; - unsigned long long mem_end; - int rc; - struct xen_memory_map memmap; - unsigned long max_pages; - unsigned long extra_pages = 0; - int i; - int op; - - max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); - mem_end = PFN_PHYS(max_pfn); - - memmap.nr_entries = E820MAX; - set_xen_guest_handle(memmap.buffer, map); - - op = xen_initial_domain() ? - XENMEM_machine_memory_map : - XENMEM_memory_map; - rc = HYPERVISOR_memory_op(op, &memmap); - if (rc == -ENOSYS) { - BUG_ON(xen_initial_domain()); - memmap.nr_entries = 1; - map[0].addr = 0ULL; - map[0].size = mem_end; - /* 8MB slack (to balance backend allocations). */ - map[0].size += 8ULL << 20; - map[0].type = E820_RAM; - rc = 0; - } - BUG_ON(rc); - - /* Make sure the Xen-supplied memory map is well-ordered. */ - sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); - - max_pages = xen_get_max_pages(); - if (max_pages > max_pfn) - extra_pages += max_pages - max_pfn; - - /* - * Set P2M for all non-RAM pages and E820 gaps to be identity - * type PFNs. Any RAM pages that would be made inaccesible by - * this are first released. - */ - xen_released_pages = xen_set_identity_and_release( - map, memmap.nr_entries, max_pfn); - extra_pages += xen_released_pages; - - /* - * Clamp the amount of extra memory to a EXTRA_MEM_RATIO - * factor the base size. On non-highmem systems, the base - * size is the full initial memory allocation; on highmem it - * is limited to the max size of lowmem, so that it doesn't - * get completely filled. - * - * In principle there could be a problem in lowmem systems if - * the initial memory is also very large with respect to - * lowmem, but we won't try to deal with that here. - */ - extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), - extra_pages); - - i = 0; - while (i < memmap.nr_entries) { - u64 addr = map[i].addr; - u64 size = map[i].size; - u32 type = map[i].type; - - if (type == E820_RAM) { - if (addr < mem_end) { - size = min(size, mem_end - addr); - } else if (extra_pages) { - size = min(size, (u64)extra_pages * PAGE_SIZE); - extra_pages -= size / PAGE_SIZE; - xen_add_extra_mem(addr, size); - } else - type = E820_UNUSABLE; - } - - xen_align_and_add_e820_region(addr, size, type); - - map[i].addr += size; - map[i].size -= size; - if (map[i].size == 0) - i++; - } - - /* - * In domU, the ISA region is normal, usable memory, but we - * reserve ISA memory anyway because too many things poke - * about in there. - */ - e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, - E820_RESERVED); - - /* - * Reserve Xen bits: - * - mfn_list - * - xen_start_info - * See comment above "struct start_info" in <xen/interface/xen.h> - */ - memblock_reserve(__pa(xen_start_info->mfn_list), - xen_start_info->pt_base - xen_start_info->mfn_list); - - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); - - return "Xen"; -} - -/* - * Set the bit indicating "nosegneg" library variants should be used. - * We only need to bother in pure 32-bit mode; compat 32-bit processes - * can have un-truncated segments, so wrapping around is allowed. - */ -static void __init fiddle_vdso(void) -{ -#ifdef CONFIG_X86_32 - u32 *mask; - mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK); - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; - mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK); - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; -#endif -} - -static int __cpuinit register_callback(unsigned type, const void *func) -{ - struct callback_register callback = { - .type = type, - .address = XEN_CALLBACK(__KERNEL_CS, func), - .flags = CALLBACKF_mask_events, - }; - - return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); -} - -void __cpuinit xen_enable_sysenter(void) -{ - int ret; - unsigned sysenter_feature; - -#ifdef CONFIG_X86_32 - sysenter_feature = X86_FEATURE_SEP; -#else - sysenter_feature = X86_FEATURE_SYSENTER32; -#endif - - if (!boot_cpu_has(sysenter_feature)) - return; - - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); - if(ret != 0) - setup_clear_cpu_cap(sysenter_feature); -} - -void __cpuinit xen_enable_syscall(void) -{ -#ifdef CONFIG_X86_64 - int ret; - - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); - if (ret != 0) { - printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); - /* Pretty fatal; 64-bit userspace has no other - mechanism for syscalls. */ - } - - if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { - ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); - if (ret != 0) - setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); - } -#endif /* CONFIG_X86_64 */ -} - -void __init xen_arch_setup(void) -{ - xen_panic_handler_init(); - - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - - if (!xen_feature(XENFEAT_auto_translated_physmap)) - HYPERVISOR_vm_assist(VMASST_CMD_enable, - VMASST_TYPE_pae_extended_cr3); - - if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || - register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) - BUG(); - - xen_enable_sysenter(); - xen_enable_syscall(); - -#ifdef CONFIG_ACPI - if (!(xen_start_info->flags & SIF_INITDOMAIN)) { - printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); - disable_acpi(); - } -#endif - - memcpy(boot_command_line, xen_start_info->cmd_line, - MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? - COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); - - /* Set up idle, making sure it calls safe_halt() pvop */ -#ifdef CONFIG_X86_32 - boot_cpu_data.hlt_works_ok = 1; -#endif - disable_cpuidle(); - disable_cpufreq(); - WARN_ON(set_pm_idle_to_default()); - fiddle_vdso(); -} diff --git a/ANDROID_3.4.5/arch/x86/xen/smp.c b/ANDROID_3.4.5/arch/x86/xen/smp.c deleted file mode 100644 index 0503c0c4..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/smp.c +++ /dev/null @@ -1,592 +0,0 @@ -/* - * Xen SMP support - * - * This file implements the Xen versions of smp_ops. SMP under Xen is - * very straightforward. Bringing a CPU up is simply a matter of - * loading its initial context and setting it running. - * - * IPIs are handled through the Xen event mechanism. - * - * Because virtual CPUs can be scheduled onto any real CPU, there's no - * useful topology information for the kernel to make use of. As a - * result, all CPUs are treated as if they're single-core and - * single-threaded. - */ -#include <linux/sched.h> -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/smp.h> - -#include <asm/paravirt.h> -#include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/cpu.h> - -#include <xen/interface/xen.h> -#include <xen/interface/vcpu.h> - -#include <asm/xen/interface.h> -#include <asm/xen/hypercall.h> - -#include <xen/xen.h> -#include <xen/page.h> -#include <xen/events.h> - -#include <xen/hvc-console.h> -#include "xen-ops.h" -#include "mmu.h" - -cpumask_var_t xen_cpu_initialized_map; - -static DEFINE_PER_CPU(int, xen_resched_irq); -static DEFINE_PER_CPU(int, xen_callfunc_irq); -static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); -static DEFINE_PER_CPU(int, xen_debug_irq) = -1; - -static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); -static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); - -/* - * Reschedule call back. - */ -static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) -{ - inc_irq_stat(irq_resched_count); - scheduler_ipi(); - - return IRQ_HANDLED; -} - -static void __cpuinit cpu_bringup(void) -{ - int cpu; - - cpu_init(); - touch_softlockup_watchdog(); - preempt_disable(); - - xen_enable_sysenter(); - xen_enable_syscall(); - - cpu = smp_processor_id(); - smp_store_cpu_info(cpu); - cpu_data(cpu).x86_max_cores = 1; - set_cpu_sibling_map(cpu); - - xen_setup_cpu_clockevents(); - - notify_cpu_starting(cpu); - - ipi_call_lock(); - set_cpu_online(cpu, true); - ipi_call_unlock(); - - this_cpu_write(cpu_state, CPU_ONLINE); - - wmb(); - - /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); - - wmb(); /* make sure everything is out */ -} - -static void __cpuinit cpu_bringup_and_idle(void) -{ - cpu_bringup(); - cpu_idle(); -} - -static int xen_smp_intr_init(unsigned int cpu) -{ - int rc; - const char *resched_name, *callfunc_name, *debug_name; - - resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); - rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, - cpu, - xen_reschedule_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - resched_name, - NULL); - if (rc < 0) - goto fail; - per_cpu(xen_resched_irq, cpu) = rc; - - callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); - rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, - cpu, - xen_call_function_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - callfunc_name, - NULL); - if (rc < 0) - goto fail; - per_cpu(xen_callfunc_irq, cpu) = rc; - - debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); - rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, - IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING, - debug_name, NULL); - if (rc < 0) - goto fail; - per_cpu(xen_debug_irq, cpu) = rc; - - callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); - rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, - cpu, - xen_call_function_single_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - callfunc_name, - NULL); - if (rc < 0) - goto fail; - per_cpu(xen_callfuncsingle_irq, cpu) = rc; - - return 0; - - fail: - if (per_cpu(xen_resched_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); - if (per_cpu(xen_callfunc_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); - if (per_cpu(xen_debug_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); - if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), - NULL); - - return rc; -} - -static void __init xen_fill_possible_map(void) -{ - int i, rc; - - if (xen_initial_domain()) - return; - - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } - } -} - -static void __init xen_filter_cpu_maps(void) -{ - int i, rc; - unsigned int subtract = 0; - - if (!xen_initial_domain()) - return; - - num_processors = 0; - disabled_cpus = 0; - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } else { - set_cpu_possible(i, false); - set_cpu_present(i, false); - subtract++; - } - } -#ifdef CONFIG_HOTPLUG_CPU - /* This is akin to using 'nr_cpus' on the Linux command line. - * Which is OK as when we use 'dom0_max_vcpus=X' we can only - * have up to X, while nr_cpu_ids is greater than X. This - * normally is not a problem, except when CPU hotplugging - * is involved and then there might be more than X CPUs - * in the guest - which will not work as there is no - * hypercall to expand the max number of VCPUs an already - * running guest has. So cap it up to X. */ - if (subtract) - nr_cpu_ids = nr_cpu_ids - subtract; -#endif - -} - -static void __init xen_smp_prepare_boot_cpu(void) -{ - BUG_ON(smp_processor_id() != 0); - native_smp_prepare_boot_cpu(); - - /* We've switched to the "real" per-cpu gdt, so make sure the - old memory can be recycled */ - make_lowmem_page_readwrite(xen_initial_gdt); - - xen_filter_cpu_maps(); - xen_setup_vcpu_info_placement(); -} - -static void __init xen_smp_prepare_cpus(unsigned int max_cpus) -{ - unsigned cpu; - unsigned int i; - - if (skip_ioapic_setup) { - char *m = (max_cpus == 0) ? - "The nosmp parameter is incompatible with Xen; " \ - "use Xen dom0_max_vcpus=1 parameter" : - "The noapic parameter is incompatible with Xen"; - - xen_raw_printk(m); - panic(m); - } - xen_init_lock_cpu(0); - - smp_store_cpu_info(0); - cpu_data(0).x86_max_cores = 1; - - for_each_possible_cpu(i) { - zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); - } - set_cpu_sibling_map(0); - - if (xen_smp_intr_init(0)) - BUG(); - - if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) - panic("could not allocate xen_cpu_initialized_map\n"); - - cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); - - /* Restrict the possible_map according to max_cpus. */ - while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) - continue; - set_cpu_possible(cpu, false); - } - - for_each_possible_cpu (cpu) { - struct task_struct *idle; - - if (cpu == 0) - continue; - - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU %d", cpu); - - set_cpu_present(cpu, true); - } -} - -static int __cpuinit -cpu_initialize_context(unsigned int cpu, struct task_struct *idle) -{ - struct vcpu_guest_context *ctxt; - struct desc_struct *gdt; - unsigned long gdt_mfn; - - if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) - return 0; - - ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (ctxt == NULL) - return -ENOMEM; - - gdt = get_cpu_gdt_table(cpu); - - ctxt->flags = VGCF_IN_KERNEL; - ctxt->user_regs.ds = __USER_DS; - ctxt->user_regs.es = __USER_DS; - ctxt->user_regs.ss = __KERNEL_DS; -#ifdef CONFIG_X86_32 - ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = __KERNEL_STACK_CANARY; -#else - ctxt->gs_base_kernel = per_cpu_offset(cpu); -#endif - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; - ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ - - memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - - xen_copy_trap_info(ctxt->trap_ctxt); - - ctxt->ldt_ents = 0; - - BUG_ON((unsigned long)gdt & ~PAGE_MASK); - - gdt_mfn = arbitrary_virt_to_mfn(gdt); - make_lowmem_page_readonly(gdt); - make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); - - ctxt->gdt_frames[0] = gdt_mfn; - ctxt->gdt_ents = GDT_ENTRIES; - - ctxt->user_regs.cs = __KERNEL_CS; - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); - - ctxt->kernel_ss = __KERNEL_DS; - ctxt->kernel_sp = idle->thread.sp0; - -#ifdef CONFIG_X86_32 - ctxt->event_callback_cs = __KERNEL_CS; - ctxt->failsafe_callback_cs = __KERNEL_CS; -#endif - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; - ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; - - per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); - ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); - - if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) - BUG(); - - kfree(ctxt); - return 0; -} - -static int __cpuinit xen_cpu_up(unsigned int cpu) -{ - struct task_struct *idle = idle_task(cpu); - int rc; - - per_cpu(current_task, cpu) = idle; -#ifdef CONFIG_X86_32 - irq_ctx_init(cpu); -#else - clear_tsk_thread_flag(idle, TIF_FORK); - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; -#endif - xen_setup_runstate_info(cpu); - xen_setup_timer(cpu); - xen_init_lock_cpu(cpu); - - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - - /* make sure interrupts start blocked */ - per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; - - rc = cpu_initialize_context(cpu, idle); - if (rc) - return rc; - - if (num_online_cpus() == 1) - alternatives_smp_switch(1); - - rc = xen_smp_intr_init(cpu); - if (rc) - return rc; - - rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); - BUG_ON(rc); - - while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { - HYPERVISOR_sched_op(SCHEDOP_yield, NULL); - barrier(); - } - - return 0; -} - -static void xen_smp_cpus_done(unsigned int max_cpus) -{ -} - -#ifdef CONFIG_HOTPLUG_CPU -static int xen_cpu_disable(void) -{ - unsigned int cpu = smp_processor_id(); - if (cpu == 0) - return -EBUSY; - - cpu_disable_common(); - - load_cr3(swapper_pg_dir); - return 0; -} - -static void xen_cpu_die(unsigned int cpu) -{ - while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ/10); - } - unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - - if (num_online_cpus() == 1) - alternatives_smp_switch(0); -} - -static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ -{ - play_dead_common(); - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); - cpu_bringup(); - /* - * Balance out the preempt calls - as we are running in cpu_idle - * loop which has been called at bootup from cpu_bringup_and_idle. - * The cpucpu_bringup_and_idle called cpu_bringup which made a - * preempt_disable() So this preempt_enable will balance it out. - */ - preempt_enable(); -} - -#else /* !CONFIG_HOTPLUG_CPU */ -static int xen_cpu_disable(void) -{ - return -ENOSYS; -} - -static void xen_cpu_die(unsigned int cpu) -{ - BUG(); -} - -static void xen_play_dead(void) -{ - BUG(); -} - -#endif -static void stop_self(void *v) -{ - int cpu = smp_processor_id(); - - /* make sure we're not pinning something down */ - load_cr3(swapper_pg_dir); - /* should set up a minimal gdt */ - - set_cpu_online(cpu, false); - - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); - BUG(); -} - -static void xen_stop_other_cpus(int wait) -{ - smp_call_function(stop_self, NULL, wait); -} - -static void xen_smp_send_reschedule(int cpu) -{ - xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); -} - -static void xen_send_IPI_mask(const struct cpumask *mask, - enum ipi_vector vector) -{ - unsigned cpu; - - for_each_cpu_and(cpu, mask, cpu_online_mask) - xen_send_IPI_one(cpu, vector); -} - -static void xen_smp_send_call_function_ipi(const struct cpumask *mask) -{ - int cpu; - - xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); - - /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu(cpu, mask) { - if (xen_vcpu_stolen(cpu)) { - HYPERVISOR_sched_op(SCHEDOP_yield, NULL); - break; - } - } -} - -static void xen_smp_send_call_function_single_ipi(int cpu) -{ - xen_send_IPI_mask(cpumask_of(cpu), - XEN_CALL_FUNCTION_SINGLE_VECTOR); -} - -static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) -{ - irq_enter(); - generic_smp_call_function_interrupt(); - inc_irq_stat(irq_call_count); - irq_exit(); - - return IRQ_HANDLED; -} - -static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) -{ - irq_enter(); - generic_smp_call_function_single_interrupt(); - inc_irq_stat(irq_call_count); - irq_exit(); - - return IRQ_HANDLED; -} - -static const struct smp_ops xen_smp_ops __initconst = { - .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, - .smp_prepare_cpus = xen_smp_prepare_cpus, - .smp_cpus_done = xen_smp_cpus_done, - - .cpu_up = xen_cpu_up, - .cpu_die = xen_cpu_die, - .cpu_disable = xen_cpu_disable, - .play_dead = xen_play_dead, - - .stop_other_cpus = xen_stop_other_cpus, - .smp_send_reschedule = xen_smp_send_reschedule, - - .send_call_func_ipi = xen_smp_send_call_function_ipi, - .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, -}; - -void __init xen_smp_init(void) -{ - smp_ops = xen_smp_ops; - xen_fill_possible_map(); - xen_init_spinlocks(); -} - -static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) -{ - native_smp_prepare_cpus(max_cpus); - WARN_ON(xen_smp_intr_init(0)); - - xen_init_lock_cpu(0); -} - -static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) -{ - int rc; - rc = native_cpu_up(cpu); - WARN_ON (xen_smp_intr_init(cpu)); - return rc; -} - -static void xen_hvm_cpu_die(unsigned int cpu) -{ - unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); - native_cpu_die(cpu); -} - -void __init xen_hvm_smp_init(void) -{ - if (!xen_have_vector_callback) - return; - smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; - smp_ops.smp_send_reschedule = xen_smp_send_reschedule; - smp_ops.cpu_up = xen_hvm_cpu_up; - smp_ops.cpu_die = xen_hvm_cpu_die; - smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; - smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; -} diff --git a/ANDROID_3.4.5/arch/x86/xen/spinlock.c b/ANDROID_3.4.5/arch/x86/xen/spinlock.c deleted file mode 100644 index d69cc6c3..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/spinlock.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Split spinlock implementation out into its own file, so it can be - * compiled in a FTRACE-compatible way. - */ -#include <linux/kernel_stat.h> -#include <linux/spinlock.h> -#include <linux/debugfs.h> -#include <linux/log2.h> -#include <linux/gfp.h> - -#include <asm/paravirt.h> - -#include <xen/interface/xen.h> -#include <xen/events.h> - -#include "xen-ops.h" -#include "debugfs.h" - -#ifdef CONFIG_XEN_DEBUG_FS -static struct xen_spinlock_stats -{ - u64 taken; - u32 taken_slow; - u32 taken_slow_nested; - u32 taken_slow_pickup; - u32 taken_slow_spurious; - u32 taken_slow_irqenable; - - u64 released; - u32 released_slow; - u32 released_slow_kicked; - -#define HISTO_BUCKETS 30 - u32 histo_spin_total[HISTO_BUCKETS+1]; - u32 histo_spin_spinning[HISTO_BUCKETS+1]; - u32 histo_spin_blocked[HISTO_BUCKETS+1]; - - u64 time_total; - u64 time_spinning; - u64 time_blocked; -} spinlock_stats; - -static u8 zero_stats; - -static unsigned lock_timeout = 1 << 10; -#define TIMEOUT lock_timeout - -static inline void check_zero(void) -{ - if (unlikely(zero_stats)) { - memset(&spinlock_stats, 0, sizeof(spinlock_stats)); - zero_stats = 0; - } -} - -#define ADD_STATS(elem, val) \ - do { check_zero(); spinlock_stats.elem += (val); } while(0) - -static inline u64 spin_time_start(void) -{ - return xen_clocksource_read(); -} - -static void __spin_time_accum(u64 delta, u32 *array) -{ - unsigned index = ilog2(delta); - - check_zero(); - - if (index < HISTO_BUCKETS) - array[index]++; - else - array[HISTO_BUCKETS]++; -} - -static inline void spin_time_accum_spinning(u64 start) -{ - u32 delta = xen_clocksource_read() - start; - - __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); - spinlock_stats.time_spinning += delta; -} - -static inline void spin_time_accum_total(u64 start) -{ - u32 delta = xen_clocksource_read() - start; - - __spin_time_accum(delta, spinlock_stats.histo_spin_total); - spinlock_stats.time_total += delta; -} - -static inline void spin_time_accum_blocked(u64 start) -{ - u32 delta = xen_clocksource_read() - start; - - __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); - spinlock_stats.time_blocked += delta; -} -#else /* !CONFIG_XEN_DEBUG_FS */ -#define TIMEOUT (1 << 10) -#define ADD_STATS(elem, val) do { (void)(val); } while(0) - -static inline u64 spin_time_start(void) -{ - return 0; -} - -static inline void spin_time_accum_total(u64 start) -{ -} -static inline void spin_time_accum_spinning(u64 start) -{ -} -static inline void spin_time_accum_blocked(u64 start) -{ -} -#endif /* CONFIG_XEN_DEBUG_FS */ - -/* - * Size struct xen_spinlock so it's the same as arch_spinlock_t. - */ -#if NR_CPUS < 256 -typedef u8 xen_spinners_t; -# define inc_spinners(xl) \ - asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory"); -# define dec_spinners(xl) \ - asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory"); -#else -typedef u16 xen_spinners_t; -# define inc_spinners(xl) \ - asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory"); -# define dec_spinners(xl) \ - asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); -#endif - -struct xen_spinlock { - unsigned char lock; /* 0 -> free; 1 -> locked */ - xen_spinners_t spinners; /* count of waiting cpus */ -}; - -static int xen_spin_is_locked(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - return xl->lock != 0; -} - -static int xen_spin_is_contended(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - /* Not strictly true; this is only the count of contended - lock-takers entering the slow path. */ - return xl->spinners != 0; -} - -static int xen_spin_trylock(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - u8 old = 1; - - asm("xchgb %b0,%1" - : "+q" (old), "+m" (xl->lock) : : "memory"); - - return old == 0; -} - -static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; -static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); - -/* - * Mark a cpu as interested in a lock. Returns the CPU's previous - * lock of interest, in case we got preempted by an interrupt. - */ -static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) -{ - struct xen_spinlock *prev; - - prev = __this_cpu_read(lock_spinners); - __this_cpu_write(lock_spinners, xl); - - wmb(); /* set lock of interest before count */ - - inc_spinners(xl); - - return prev; -} - -/* - * Mark a cpu as no longer interested in a lock. Restores previous - * lock of interest (NULL for none). - */ -static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) -{ - dec_spinners(xl); - wmb(); /* decrement count before restoring lock */ - __this_cpu_write(lock_spinners, prev); -} - -static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - struct xen_spinlock *prev; - int irq = __this_cpu_read(lock_kicker_irq); - int ret; - u64 start; - - /* If kicker interrupts not initialized yet, just spin */ - if (irq == -1) - return 0; - - start = spin_time_start(); - - /* announce we're spinning */ - prev = spinning_lock(xl); - - ADD_STATS(taken_slow, 1); - ADD_STATS(taken_slow_nested, prev != NULL); - - do { - unsigned long flags; - - /* clear pending */ - xen_clear_irq_pending(irq); - - /* check again make sure it didn't become free while - we weren't looking */ - ret = xen_spin_trylock(lock); - if (ret) { - ADD_STATS(taken_slow_pickup, 1); - - /* - * If we interrupted another spinlock while it - * was blocking, make sure it doesn't block - * without rechecking the lock. - */ - if (prev != NULL) - xen_set_irq_pending(irq); - goto out; - } - - flags = arch_local_save_flags(); - if (irq_enable) { - ADD_STATS(taken_slow_irqenable, 1); - raw_local_irq_enable(); - } - - /* - * Block until irq becomes pending. If we're - * interrupted at this point (after the trylock but - * before entering the block), then the nested lock - * handler guarantees that the irq will be left - * pending if there's any chance the lock became free; - * xen_poll_irq() returns immediately if the irq is - * pending. - */ - xen_poll_irq(irq); - - raw_local_irq_restore(flags); - - ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); - } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ - - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); - -out: - unspinning_lock(xl, prev); - spin_time_accum_blocked(start); - - return ret; -} - -static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - unsigned timeout; - u8 oldval; - u64 start_spin; - - ADD_STATS(taken, 1); - - start_spin = spin_time_start(); - - do { - u64 start_spin_fast = spin_time_start(); - - timeout = TIMEOUT; - - asm("1: xchgb %1,%0\n" - " testb %1,%1\n" - " jz 3f\n" - "2: rep;nop\n" - " cmpb $0,%0\n" - " je 1b\n" - " dec %2\n" - " jnz 2b\n" - "3:\n" - : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) - : "1" (1) - : "memory"); - - spin_time_accum_spinning(start_spin_fast); - - } while (unlikely(oldval != 0 && - (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); - - spin_time_accum_total(start_spin); -} - -static void xen_spin_lock(struct arch_spinlock *lock) -{ - __xen_spin_lock(lock, false); -} - -static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) -{ - __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); -} - -static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) -{ - int cpu; - - ADD_STATS(released_slow, 1); - - for_each_online_cpu(cpu) { - /* XXX should mix up next cpu selection */ - if (per_cpu(lock_spinners, cpu) == xl) { - ADD_STATS(released_slow_kicked, 1); - xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); - break; - } - } -} - -static void xen_spin_unlock(struct arch_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - ADD_STATS(released, 1); - - smp_wmb(); /* make sure no writes get moved after unlock */ - xl->lock = 0; /* release lock */ - - /* - * Make sure unlock happens before checking for waiting - * spinners. We need a strong barrier to enforce the - * write-read ordering to different memory locations, as the - * CPU makes no implied guarantees about their ordering. - */ - mb(); - - if (unlikely(xl->spinners)) - xen_spin_unlock_slow(xl); -} - -static irqreturn_t dummy_handler(int irq, void *dev_id) -{ - BUG(); - return IRQ_HANDLED; -} - -void __cpuinit xen_init_lock_cpu(int cpu) -{ - int irq; - const char *name; - - name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); - irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, - cpu, - dummy_handler, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - name, - NULL); - - if (irq >= 0) { - disable_irq(irq); /* make sure it's never delivered */ - per_cpu(lock_kicker_irq, cpu) = irq; - } - - printk("cpu %d spinlock event irq %d\n", cpu, irq); -} - -void xen_uninit_lock_cpu(int cpu) -{ - unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); -} - -void __init xen_init_spinlocks(void) -{ - BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); - - pv_lock_ops.spin_is_locked = xen_spin_is_locked; - pv_lock_ops.spin_is_contended = xen_spin_is_contended; - pv_lock_ops.spin_lock = xen_spin_lock; - pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; - pv_lock_ops.spin_trylock = xen_spin_trylock; - pv_lock_ops.spin_unlock = xen_spin_unlock; -} - -#ifdef CONFIG_XEN_DEBUG_FS - -static struct dentry *d_spin_debug; - -static int __init xen_spinlock_debugfs(void) -{ - struct dentry *d_xen = xen_init_debugfs(); - - if (d_xen == NULL) - return -ENOMEM; - - d_spin_debug = debugfs_create_dir("spinlocks", d_xen); - - debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); - - debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); - - debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); - debugfs_create_u32("taken_slow", 0444, d_spin_debug, - &spinlock_stats.taken_slow); - debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, - &spinlock_stats.taken_slow_nested); - debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, - &spinlock_stats.taken_slow_pickup); - debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, - &spinlock_stats.taken_slow_spurious); - debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, - &spinlock_stats.taken_slow_irqenable); - - debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); - debugfs_create_u32("released_slow", 0444, d_spin_debug, - &spinlock_stats.released_slow); - debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, - &spinlock_stats.released_slow_kicked); - - debugfs_create_u64("time_spinning", 0444, d_spin_debug, - &spinlock_stats.time_spinning); - debugfs_create_u64("time_blocked", 0444, d_spin_debug, - &spinlock_stats.time_blocked); - debugfs_create_u64("time_total", 0444, d_spin_debug, - &spinlock_stats.time_total); - - xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, - spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); - xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, - spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); - xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, - spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); - - return 0; -} -fs_initcall(xen_spinlock_debugfs); - -#endif /* CONFIG_XEN_DEBUG_FS */ diff --git a/ANDROID_3.4.5/arch/x86/xen/suspend.c b/ANDROID_3.4.5/arch/x86/xen/suspend.c deleted file mode 100644 index 45329c8c..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/suspend.c +++ /dev/null @@ -1,80 +0,0 @@ -#include <linux/types.h> -#include <linux/clockchips.h> - -#include <xen/interface/xen.h> -#include <xen/grant_table.h> -#include <xen/events.h> - -#include <asm/xen/hypercall.h> -#include <asm/xen/page.h> -#include <asm/fixmap.h> - -#include "xen-ops.h" -#include "mmu.h" - -void xen_arch_pre_suspend(void) -{ - xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); - xen_start_info->console.domU.mfn = - mfn_to_pfn(xen_start_info->console.domU.mfn); - - BUG_ON(!irqs_disabled()); - - HYPERVISOR_shared_info = &xen_dummy_shared_info; - if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), - __pte_ma(0), 0)) - BUG(); -} - -void xen_arch_hvm_post_suspend(int suspend_cancelled) -{ -#ifdef CONFIG_XEN_PVHVM - int cpu; - xen_hvm_init_shared_info(); - xen_callback_vector(); - xen_unplug_emulated_devices(); - if (xen_feature(XENFEAT_hvm_safe_pvclock)) { - for_each_online_cpu(cpu) { - xen_setup_runstate_info(cpu); - } - } -#endif -} - -void xen_arch_post_suspend(int suspend_cancelled) -{ - xen_build_mfn_list_list(); - - xen_setup_shared_info(); - - if (suspend_cancelled) { - xen_start_info->store_mfn = - pfn_to_mfn(xen_start_info->store_mfn); - xen_start_info->console.domU.mfn = - pfn_to_mfn(xen_start_info->console.domU.mfn); - } else { -#ifdef CONFIG_SMP - BUG_ON(xen_cpu_initialized_map == NULL); - cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); -#endif - xen_vcpu_restore(); - } - -} - -static void xen_vcpu_notify_restore(void *data) -{ - unsigned long reason = (unsigned long)data; - - /* Boot processor notified via generic timekeeping_resume() */ - if ( smp_processor_id() == 0) - return; - - clockevents_notify(reason, NULL); -} - -void xen_arch_resume(void) -{ - on_each_cpu(xen_vcpu_notify_restore, - (void *)CLOCK_EVT_NOTIFY_RESUME, 1); -} diff --git a/ANDROID_3.4.5/arch/x86/xen/time.c b/ANDROID_3.4.5/arch/x86/xen/time.c deleted file mode 100644 index 0296a952..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/time.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Xen time implementation. - * - * This is implemented in terms of a clocksource driver which uses - * the hypervisor clock as a nanosecond timebase, and a clockevent - * driver which uses the hypervisor's timer mechanism. - * - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/clocksource.h> -#include <linux/clockchips.h> -#include <linux/kernel_stat.h> -#include <linux/math64.h> -#include <linux/gfp.h> - -#include <asm/pvclock.h> -#include <asm/xen/hypervisor.h> -#include <asm/xen/hypercall.h> - -#include <xen/events.h> -#include <xen/features.h> -#include <xen/interface/xen.h> -#include <xen/interface/vcpu.h> - -#include "xen-ops.h" - -/* Xen may fire a timer up to this many ns early */ -#define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -/* runstate info updated by Xen */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); - -/* unused ns of stolen and blocked time */ -static DEFINE_PER_CPU(u64, xen_residual_stolen); -static DEFINE_PER_CPU(u64, xen_residual_blocked); - -/* return an consistent snapshot of 64-bit time/counter value */ -static u64 get64(const u64 *p) -{ - u64 ret; - - if (BITS_PER_LONG < 64) { - u32 *p32 = (u32 *)p; - u32 h, l; - - /* - * Read high then low, and then make sure high is - * still the same; this will only loop if low wraps - * and carries into high. - * XXX some clean way to make this endian-proof? - */ - do { - h = p32[1]; - barrier(); - l = p32[0]; - barrier(); - } while (p32[1] != h); - - ret = (((u64)h) << 32) | l; - } else - ret = *p; - - return ret; -} - -/* - * Runstate accounting - */ -static void get_runstate_snapshot(struct vcpu_runstate_info *res) -{ - u64 state_time; - struct vcpu_runstate_info *state; - - BUG_ON(preemptible()); - - state = &__get_cpu_var(xen_runstate); - - /* - * The runstate info is always updated by the hypervisor on - * the current CPU, so there's no need to use anything - * stronger than a compiler barrier when fetching it. - */ - do { - state_time = get64(&state->state_entry_time); - barrier(); - *res = *state; - barrier(); - } while (get64(&state->state_entry_time) != state_time); -} - -/* return true when a vcpu could run but has no real cpu to run on */ -bool xen_vcpu_stolen(int vcpu) -{ - return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; -} - -void xen_setup_runstate_info(int cpu) -{ - struct vcpu_register_runstate_memory_area area; - - area.addr.v = &per_cpu(xen_runstate, cpu); - - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, - cpu, &area)) - BUG(); -} - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 blocked, runnable, offline, stolen; - cputime_t ticks; - - get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = &__get_cpu_var(xen_runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. */ - stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __this_cpu_write(xen_residual_stolen, stolen); - account_steal_ticks(ticks); - - /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. */ - blocked += __this_cpu_read(xen_residual_blocked); - - if (blocked < 0) - blocked = 0; - - ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); - __this_cpu_write(xen_residual_blocked, blocked); - account_idle_ticks(ticks); -} - -/* Get the TSC speed from Xen */ -static unsigned long xen_tsc_khz(void) -{ - struct pvclock_vcpu_time_info *info = - &HYPERVISOR_shared_info->vcpu_info[0].time; - - return pvclock_tsc_khz(info); -} - -cycle_t xen_clocksource_read(void) -{ - struct pvclock_vcpu_time_info *src; - cycle_t ret; - - preempt_disable_notrace(); - src = &__get_cpu_var(xen_vcpu)->time; - ret = pvclock_clocksource_read(src); - preempt_enable_notrace(); - return ret; -} - -static cycle_t xen_clocksource_get_cycles(struct clocksource *cs) -{ - return xen_clocksource_read(); -} - -static void xen_read_wallclock(struct timespec *ts) -{ - struct shared_info *s = HYPERVISOR_shared_info; - struct pvclock_wall_clock *wall_clock = &(s->wc); - struct pvclock_vcpu_time_info *vcpu_time; - - vcpu_time = &get_cpu_var(xen_vcpu)->time; - pvclock_read_wallclock(wall_clock, vcpu_time, ts); - put_cpu_var(xen_vcpu); -} - -static unsigned long xen_get_wallclock(void) -{ - struct timespec ts; - - xen_read_wallclock(&ts); - return ts.tv_sec; -} - -static int xen_set_wallclock(unsigned long now) -{ - struct xen_platform_op op; - int rc; - - /* do nothing for domU */ - if (!xen_initial_domain()) - return -1; - - op.cmd = XENPF_settime; - op.u.settime.secs = now; - op.u.settime.nsecs = 0; - op.u.settime.system_time = xen_clocksource_read(); - - rc = HYPERVISOR_dom0_op(&op); - WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); - - return rc; -} - -static struct clocksource xen_clocksource __read_mostly = { - .name = "xen", - .rating = 400, - .read = xen_clocksource_get_cycles, - .mask = ~0, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -/* - Xen clockevent implementation - - Xen has two clockevent implementations: - - The old timer_op one works with all released versions of Xen prior - to version 3.0.4. This version of the hypervisor provides a - single-shot timer with nanosecond resolution. However, sharing the - same event channel is a 100Hz tick which is delivered while the - vcpu is running. We don't care about or use this tick, but it will - cause the core time code to think the timer fired too soon, and - will end up resetting it each time. It could be filtered, but - doing so has complications when the ktime clocksource is not yet - the xen clocksource (ie, at boot time). - - The new vcpu_op-based timer interface allows the tick timer period - to be changed or turned off. The tick timer is not useful as a - periodic timer because events are only delivered to running vcpus. - The one-shot timer can report when a timeout is in the past, so - set_next_event is capable of returning -ETIME when appropriate. - This interface is used when available. -*/ - - -/* - Get a hypervisor absolute time. In theory we could maintain an - offset between the kernel's time and the hypervisor's time, and - apply that to a kernel's absolute timeout. Unfortunately the - hypervisor and kernel times can drift even if the kernel is using - the Xen clocksource, because ntp can warp the kernel's clocksource. -*/ -static s64 get_abs_timeout(unsigned long delta) -{ - return xen_clocksource_read() + delta; -} - -static void xen_timerop_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - /* unsupported */ - WARN_ON(1); - break; - - case CLOCK_EVT_MODE_ONESHOT: - case CLOCK_EVT_MODE_RESUME: - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - HYPERVISOR_set_timer_op(0); /* cancel timeout */ - break; - } -} - -static int xen_timerop_set_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); - - if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) - BUG(); - - /* We may have missed the deadline, but there's no real way of - knowing for sure. If the event was in the past, then we'll - get an immediate interrupt. */ - - return 0; -} - -static const struct clock_event_device xen_timerop_clockevent = { - .name = "xen", - .features = CLOCK_EVT_FEAT_ONESHOT, - - .max_delta_ns = 0xffffffff, - .min_delta_ns = TIMER_SLOP, - - .mult = 1, - .shift = 0, - .rating = 500, - - .set_mode = xen_timerop_set_mode, - .set_next_event = xen_timerop_set_next_event, -}; - - - -static void xen_vcpuop_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - int cpu = smp_processor_id(); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - WARN_ON(1); /* unsupported */ - break; - - case CLOCK_EVT_MODE_ONESHOT: - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) - BUG(); - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || - HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) - BUG(); - break; - case CLOCK_EVT_MODE_RESUME: - break; - } -} - -static int xen_vcpuop_set_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - int cpu = smp_processor_id(); - struct vcpu_set_singleshot_timer single; - int ret; - - WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); - - single.timeout_abs_ns = get_abs_timeout(delta); - single.flags = VCPU_SSHOTTMR_future; - - ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); - - BUG_ON(ret != 0 && ret != -ETIME); - - return ret; -} - -static const struct clock_event_device xen_vcpuop_clockevent = { - .name = "xen", - .features = CLOCK_EVT_FEAT_ONESHOT, - - .max_delta_ns = 0xffffffff, - .min_delta_ns = TIMER_SLOP, - - .mult = 1, - .shift = 0, - .rating = 500, - - .set_mode = xen_vcpuop_set_mode, - .set_next_event = xen_vcpuop_set_next_event, -}; - -static const struct clock_event_device *xen_clockevent = - &xen_timerop_clockevent; -static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); - -static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); - irqreturn_t ret; - - ret = IRQ_NONE; - if (evt->event_handler) { - evt->event_handler(evt); - ret = IRQ_HANDLED; - } - - do_stolen_accounting(); - - return ret; -} - -void xen_setup_timer(int cpu) -{ - const char *name; - struct clock_event_device *evt; - int irq; - - printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); - - name = kasprintf(GFP_KERNEL, "timer%d", cpu); - if (!name) - name = "<timer kasprintf failed>"; - - irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU| - IRQF_NOBALANCING|IRQF_TIMER| - IRQF_FORCE_RESUME, - name, NULL); - - evt = &per_cpu(xen_clock_events, cpu); - memcpy(evt, xen_clockevent, sizeof(*evt)); - - evt->cpumask = cpumask_of(cpu); - evt->irq = irq; -} - -void xen_teardown_timer(int cpu) -{ - struct clock_event_device *evt; - BUG_ON(cpu == 0); - evt = &per_cpu(xen_clock_events, cpu); - unbind_from_irqhandler(evt->irq, NULL); -} - -void xen_setup_cpu_clockevents(void) -{ - BUG_ON(preemptible()); - - clockevents_register_device(&__get_cpu_var(xen_clock_events)); -} - -void xen_timer_resume(void) -{ - int cpu; - - pvclock_resume(); - - if (xen_clockevent != &xen_vcpuop_clockevent) - return; - - for_each_online_cpu(cpu) { - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) - BUG(); - } -} - -static const struct pv_time_ops xen_time_ops __initconst = { - .sched_clock = xen_clocksource_read, -}; - -static void __init xen_time_init(void) -{ - int cpu = smp_processor_id(); - struct timespec tp; - - clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); - - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { - /* Successfully turned off 100Hz tick, so we have the - vcpuop-based timer interface */ - printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); - xen_clockevent = &xen_vcpuop_clockevent; - } - - /* Set initial system time with full resolution */ - xen_read_wallclock(&tp); - do_settimeofday(&tp); - - setup_force_cpu_cap(X86_FEATURE_TSC); - - xen_setup_runstate_info(cpu); - xen_setup_timer(cpu); - xen_setup_cpu_clockevents(); -} - -void __init xen_init_time_ops(void) -{ - pv_time_ops = xen_time_ops; - - x86_init.timers.timer_init = xen_time_init; - x86_init.timers.setup_percpu_clockev = x86_init_noop; - x86_cpuinit.setup_percpu_clockev = x86_init_noop; - - x86_platform.calibrate_tsc = xen_tsc_khz; - x86_platform.get_wallclock = xen_get_wallclock; - x86_platform.set_wallclock = xen_set_wallclock; -} - -#ifdef CONFIG_XEN_PVHVM -static void xen_hvm_setup_cpu_clockevents(void) -{ - int cpu = smp_processor_id(); - xen_setup_runstate_info(cpu); - xen_setup_timer(cpu); - xen_setup_cpu_clockevents(); -} - -void __init xen_hvm_init_time_ops(void) -{ - /* vector callback is needed otherwise we cannot receive interrupts - * on cpu > 0 and at this point we don't know how many cpus are - * available */ - if (!xen_have_vector_callback) - return; - if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { - printk(KERN_INFO "Xen doesn't support pvclock on HVM," - "disable pv timer\n"); - return; - } - - pv_time_ops = xen_time_ops; - x86_init.timers.setup_percpu_clockev = xen_time_init; - x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; - - x86_platform.calibrate_tsc = xen_tsc_khz; - x86_platform.get_wallclock = xen_get_wallclock; - x86_platform.set_wallclock = xen_set_wallclock; -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/xen/trace.c b/ANDROID_3.4.5/arch/x86/xen/trace.c deleted file mode 100644 index 520022d1..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/trace.c +++ /dev/null @@ -1,62 +0,0 @@ -#include <linux/ftrace.h> -#include <xen/interface/xen.h> - -#define N(x) [__HYPERVISOR_##x] = "("#x")" -static const char *xen_hypercall_names[] = { - N(set_trap_table), - N(mmu_update), - N(set_gdt), - N(stack_switch), - N(set_callbacks), - N(fpu_taskswitch), - N(sched_op_compat), - N(dom0_op), - N(set_debugreg), - N(get_debugreg), - N(update_descriptor), - N(memory_op), - N(multicall), - N(update_va_mapping), - N(set_timer_op), - N(event_channel_op_compat), - N(xen_version), - N(console_io), - N(physdev_op_compat), - N(grant_table_op), - N(vm_assist), - N(update_va_mapping_otherdomain), - N(iret), - N(vcpu_op), - N(set_segment_base), - N(mmuext_op), - N(acm_op), - N(nmi_op), - N(sched_op), - N(callback_op), - N(xenoprof_op), - N(event_channel_op), - N(physdev_op), - N(hvm_op), - -/* Architecture-specific hypercall definitions. */ - N(arch_0), - N(arch_1), - N(arch_2), - N(arch_3), - N(arch_4), - N(arch_5), - N(arch_6), - N(arch_7), -}; -#undef N - -static const char *xen_hypercall_name(unsigned op) -{ - if (op < ARRAY_SIZE(xen_hypercall_names) && xen_hypercall_names[op] != NULL) - return xen_hypercall_names[op]; - - return ""; -} - -#define CREATE_TRACE_POINTS -#include <trace/events/xen.h> diff --git a/ANDROID_3.4.5/arch/x86/xen/vdso.h b/ANDROID_3.4.5/arch/x86/xen/vdso.h deleted file mode 100644 index 861fedfe..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/vdso.h +++ /dev/null @@ -1,4 +0,0 @@ -/* Bit used for the pseudo-hwcap for non-negative segments. We use - bit 1 to avoid bugs in some versions of glibc when bit 0 is - used; the choice is otherwise arbitrary. */ -#define VDSO_NOTE_NONEGSEG_BIT 1 diff --git a/ANDROID_3.4.5/arch/x86/xen/vga.c b/ANDROID_3.4.5/arch/x86/xen/vga.c deleted file mode 100644 index 1cd7f4d1..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/vga.c +++ /dev/null @@ -1,67 +0,0 @@ -#include <linux/screen_info.h> -#include <linux/init.h> - -#include <asm/bootparam.h> -#include <asm/setup.h> - -#include <xen/interface/xen.h> - -#include "xen-ops.h" - -void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) -{ - struct screen_info *screen_info = &boot_params.screen_info; - - /* This is drawn from a dump from vgacon:startup in - * standard Linux. */ - screen_info->orig_video_mode = 3; - screen_info->orig_video_isVGA = 1; - screen_info->orig_video_lines = 25; - screen_info->orig_video_cols = 80; - screen_info->orig_video_ega_bx = 3; - screen_info->orig_video_points = 16; - screen_info->orig_y = screen_info->orig_video_lines - 1; - - switch (info->video_type) { - case XEN_VGATYPE_TEXT_MODE_3: - if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3) - + sizeof(info->u.text_mode_3)) - break; - screen_info->orig_video_lines = info->u.text_mode_3.rows; - screen_info->orig_video_cols = info->u.text_mode_3.columns; - screen_info->orig_x = info->u.text_mode_3.cursor_x; - screen_info->orig_y = info->u.text_mode_3.cursor_y; - screen_info->orig_video_points = - info->u.text_mode_3.font_height; - break; - - case XEN_VGATYPE_VESA_LFB: - if (size < offsetof(struct dom0_vga_console_info, - u.vesa_lfb.gbl_caps)) - break; - screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB; - screen_info->lfb_width = info->u.vesa_lfb.width; - screen_info->lfb_height = info->u.vesa_lfb.height; - screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel; - screen_info->lfb_base = info->u.vesa_lfb.lfb_base; - screen_info->lfb_size = info->u.vesa_lfb.lfb_size; - screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line; - screen_info->red_size = info->u.vesa_lfb.red_size; - screen_info->red_pos = info->u.vesa_lfb.red_pos; - screen_info->green_size = info->u.vesa_lfb.green_size; - screen_info->green_pos = info->u.vesa_lfb.green_pos; - screen_info->blue_size = info->u.vesa_lfb.blue_size; - screen_info->blue_pos = info->u.vesa_lfb.blue_pos; - screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; - screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; - if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.gbl_caps) - + sizeof(info->u.vesa_lfb.gbl_caps)) - screen_info->capabilities = info->u.vesa_lfb.gbl_caps; - if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.mode_attrs) - + sizeof(info->u.vesa_lfb.mode_attrs)) - screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; - break; - } -} diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm.S b/ANDROID_3.4.5/arch/x86/xen/xen-asm.S deleted file mode 100644 index 3e45aa00..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/xen-asm.S +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Asm versions of Xen pv-ops, suitable for either direct use or - * inlining. The inline versions are the same as the direct-use - * versions, with the pre- and post-amble chopped off. - * - * This code is encoded for size rather than absolute efficiency, with - * a view to being able to inline as much as possible. - * - * We only bother with direct forms (ie, vcpu in percpu data) of the - * operations here; the indirect forms are better handled in C, since - * they're generally too large to inline anyway. - */ - -#include <asm/asm-offsets.h> -#include <asm/percpu.h> -#include <asm/processor-flags.h> - -#include "xen-asm.h" - -/* - * Enable events. This clears the event mask and tests the pending - * event status with one and operation. If there are pending events, - * then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - - /* - * Preempt here doesn't matter because that will deal with any - * pending interrupts. The pending check may end up being run - * on the wrong CPU, but that doesn't hurt. - */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - - -/* - * Disabling events is simply a matter of making the event mask - * non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - * (xen_)save_fl is used to get the current interrupt enable status. - * Callers expect the status to be in X86_EFLAGS_IF, and other bits - * may be set in the return value. We take advantage of this by - * making sure that X86_EFLAGS_IF has the right value (and other bits - * in that byte are 0), but other bits in the return value are - * undefined. We need to toggle the state of the bit, because Xen and - * x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - setz %ah - addb %ah, %ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - * In principle the caller should be passing us a value return from - * xen_save_fl_direct, but for robustness sake we test only the - * X86_EFLAGS_IF flag rather than the whole byte. After setting the - * interrupt mask state, it checks for unmasked pending events and - * enters the hypervisor to get them delivered if so. - */ -ENTRY(xen_restore_fl_direct) -#ifdef CONFIG_X86_64 - testw $X86_EFLAGS_IF, %di -#else - testb $X86_EFLAGS_IF>>8, %ah -#endif - setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* - * Preempt here doesn't matter because that will deal with any - * pending interrupts. The pending check may end up being run - * on the wrong CPU, but that doesn't hurt. - */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jnz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) - - -/* - * Force an event check by making a hypercall, but preserve regs - * before making the call. - */ -check_events: -#ifdef CONFIG_X86_32 - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax -#else - push %rax - push %rcx - push %rdx - push %rsi - push %rdi - push %r8 - push %r9 - push %r10 - push %r11 - call xen_force_evtchn_callback - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rdi - pop %rsi - pop %rdx - pop %rcx - pop %rax -#endif - ret diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm.h b/ANDROID_3.4.5/arch/x86/xen/xen-asm.h deleted file mode 100644 index 46527646..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/xen-asm.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _XEN_XEN_ASM_H -#define _XEN_XEN_ASM_H - -#include <linux/linkage.h> - -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -#endif diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm_32.S b/ANDROID_3.4.5/arch/x86/xen/xen-asm_32.S deleted file mode 100644 index b040b0e5..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/xen-asm_32.S +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Asm versions of Xen pv-ops, suitable for either direct use or - * inlining. The inline versions are the same as the direct-use - * versions, with the pre- and post-amble chopped off. - * - * This code is encoded for size rather than absolute efficiency, with - * a view to being able to inline as much as possible. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C, since - * they're generally too large to inline anyway. - */ - -#include <asm/thread_info.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> - -#include <xen/interface/xen.h> - -#include "xen-asm.h" - -/* - * Force an event check by making a hypercall, but preserve regs - * before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret - -/* - * We can't use sysexit directly, because we're not running in ring0. - * But we can easily fake it up using iret. Assuming xen_sysexit is - * jumped to with a standard stack frame, we can just strip it back to - * a standard iret frame and use iret. - */ -ENTRY(xen_sysexit) - movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ - orl $X86_EFLAGS_IF, PT_EFLAGS(%esp) - lea PT_EIP(%esp), %esp - - jmp xen_iret -ENDPROC(xen_sysexit) - -/* - * This is run where a normal iret would be run, with the same stack setup: - * 8: eflags - * 4: cs - * esp-> 0: eip - * - * This attempts to make sure that any pending events are dealt with - * on return to usermode, but there is a small window in which an - * event can happen just before entering usermode. If the nested - * interrupt ends up setting one of the TIF_WORK_MASK pending work - * flags, they will not be tested again before returning to - * usermode. This means that a process can end up with pending work, - * which will be unprocessed until the process enters and leaves the - * kernel again, which could be an unbounded amount of time. This - * means that a pending signal or reschedule event could be - * indefinitely delayed. - * - * The fix is to notice a nested interrupt in the critical window, and - * if one occurs, then fold the nested interrupt into the current - * interrupt stack frame, and re-process it iteratively rather than - * recursively. This means that it will exit via the normal path, and - * all pending work will be dealt with appropriately. - * - * Because the nested interrupt handler needs to deal with the current - * stack state in whatever form its in, we keep things simple by only - * using a single register which is pushed/popped on the stack. - */ -ENTRY(xen_iret) - /* test eflags for special cases */ - testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) - jnz hyper_iret - - push %eax - ESP_OFFSET=4 # bytes pushed onto stack - - /* - * Store vcpu_info pointer for easy access. Do it this way to - * avoid having to reload %fs - */ -#ifdef CONFIG_SMP - GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov xen_vcpu(%eax), %eax -#else - movl xen_vcpu, %eax -#endif - - /* check IF state we're restoring */ - testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - - /* - * Maybe enable events. Once this happens we could get a - * recursive event, so the critical region starts immediately - * afterwards. However, if that happens we don't end up - * resuming the code, so we don't have to be worried about - * being preempted to another CPU. - */ - setz XEN_vcpu_info_mask(%eax) -xen_iret_start_crit: - - /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) - - /* - * If there's something pending, mask events again so we can - * jump back into xen_hypervisor_callback. Otherwise do not - * touch XEN_vcpu_info_mask. - */ - jne 1f - movb $1, XEN_vcpu_info_mask(%eax) - -1: popl %eax - - /* - * From this point on the registers are restored and the stack - * updated, so we don't need to worry about it if we're - * preempted - */ -iret_restore_end: - - /* - * Jump to hypervisor_callback after fixing up the stack. - * Events are masked, so jumping out of the critical region is - * OK. - */ - je xen_hypervisor_callback - -1: iret -xen_iret_end_crit: -.section __ex_table, "a" - .align 4 - .long 1b, iret_exc -.previous - -hyper_iret: - /* put this out of line since its very rarely used */ - jmp hypercall_page + __HYPERVISOR_iret * 32 - - .globl xen_iret_start_crit, xen_iret_end_crit - -/* - * This is called by xen_hypervisor_callback in entry.S when it sees - * that the EIP at the time of interrupt was between - * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in - * %eax so we can do a more refined determination of what to do. - * - * The stack format at this point is: - * ---------------- - * ss : (ss/esp may be present if we came from usermode) - * esp : - * eflags } outer exception info - * cs } - * eip } - * ---------------- <- edi (copy dest) - * eax : outer eax if it hasn't been restored - * ---------------- - * eflags } nested exception info - * cs } (no ss/esp because we're nested - * eip } from the same ring) - * orig_eax }<- esi (copy src) - * - - - - - - - - - * fs } - * es } - * ds } SAVE_ALL state - * eax } - * : : - * ebx }<- esp - * ---------------- - * - * In order to deliver the nested exception properly, we need to shift - * everything from the return addr up to the error code so it sits - * just under the outer exception info. This means that when we - * handle the exception, we do it in the context of the outer - * exception rather than starting a new one. - * - * The only caveat is that if the outer eax hasn't been restored yet - * (ie, it's still on stack), we need to insert its value into the - * SAVE_ALL state before going on, since it's usermode state which we - * eventually need to restore. - */ -ENTRY(xen_iret_crit_fixup) - /* - * Paranoia: Make sure we're really coming from kernel space. - * One could imagine a case where userspace jumps into the - * critical range address, but just before the CPU delivers a - * GP, it decides to deliver an interrupt instead. Unlikely? - * Definitely. Easy to avoid? Yes. The Intel documents - * explicitly say that the reported EIP for a bad jump is the - * jump instruction itself, not the destination, but some - * virtual environments get this wrong. - */ - movl PT_CS(%esp), %ecx - andl $SEGMENT_RPL_MASK, %ecx - cmpl $USER_RPL, %ecx - je 2f - - lea PT_ORIG_EAX(%esp), %esi - lea PT_EFLAGS(%esp), %edi - - /* - * If eip is before iret_restore_end then stack - * hasn't been restored yet. - */ - cmp $iret_restore_end, %eax - jae 1f - - movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ - movl %eax, PT_EAX(%esp) - - lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ - - /* set up the copy */ -1: std - mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */ - rep movsl - cld - - lea 4(%edi), %esp /* point esp to new frame */ -2: jmp xen_do_upcall - diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-asm_64.S b/ANDROID_3.4.5/arch/x86/xen/xen-asm_64.S deleted file mode 100644 index 53adefda..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/xen-asm_64.S +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Asm versions of Xen pv-ops, suitable for either direct use or - * inlining. The inline versions are the same as the direct-use - * versions, with the pre- and post-amble chopped off. - * - * This code is encoded for size rather than absolute efficiency, with - * a view to being able to inline as much as possible. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C, since - * they're generally too large to inline anyway. - */ - -#include <asm/errno.h> -#include <asm/percpu.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> - -#include <xen/interface/xen.h> - -#include "xen-asm.h" - -ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp), %rcx - mov 8+8(%rsp), %r11 - ret $16 - -hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 -/* - * Xen64 iret frame: - * - * ss - * rsp - * rflags - * cs - * rip <-- standard iret frame - * - * flags - * - * rcx } - * r11 }<-- pushed by hypercall page - * rsp->rax } - */ -ENTRY(xen_iret) - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_iret) -RELOC(xen_iret, 1b+1) - -/* - * sysexit is not used for 64-bit processes, so it's only ever used to - * return to 32-bit compat userspace. - */ -ENTRY(xen_sysexit) - pushq $__USER32_DS - pushq %rcx - pushq $X86_EFLAGS_IF - pushq $__USER32_CS - pushq %rdx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysexit) -RELOC(xen_sysexit, 1b+1) - -ENTRY(xen_sysret64) - /* - * We're already on the usermode stack at this point, but - * still with the kernel gs, so we can easily switch back - */ - movq %rsp, PER_CPU_VAR(old_rsp) - movq PER_CPU_VAR(kernel_stack), %rsp - - pushq $__USER_DS - pushq PER_CPU_VAR(old_rsp) - pushq %r11 - pushq $__USER_CS - pushq %rcx - - pushq $VGCF_in_syscall -1: jmp hypercall_iret -ENDPATCH(xen_sysret64) -RELOC(xen_sysret64, 1b+1) - -ENTRY(xen_sysret32) - /* - * We're already on the usermode stack at this point, but - * still with the kernel gs, so we can easily switch back - */ - movq %rsp, PER_CPU_VAR(old_rsp) - movq PER_CPU_VAR(kernel_stack), %rsp - - pushq $__USER32_DS - pushq PER_CPU_VAR(old_rsp) - pushq %r11 - pushq $__USER32_CS - pushq %rcx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysret32) -RELOC(xen_sysret32, 1b+1) - -/* - * Xen handles syscall callbacks much like ordinary exceptions, which - * means we have: - * - kernel gs - * - kernel rsp - * - an iret-like stack frame on the stack (including rcx and r11): - * ss - * rsp - * rflags - * cs - * rip - * r11 - * rsp->rcx - * - * In all the entrypoints, we undo all that to make it look like a - * CPU-generated syscall/sysenter and jump to the normal entrypoint. - */ - -.macro undo_xen_syscall - mov 0*8(%rsp), %rcx - mov 1*8(%rsp), %r11 - mov 5*8(%rsp), %rsp -.endm - -/* Normal 64-bit system call target */ -ENTRY(xen_syscall_target) - undo_xen_syscall - jmp system_call_after_swapgs -ENDPROC(xen_syscall_target) - -#ifdef CONFIG_IA32_EMULATION - -/* 32-bit compat syscall target */ -ENTRY(xen_syscall32_target) - undo_xen_syscall - jmp ia32_cstar_target -ENDPROC(xen_syscall32_target) - -/* 32-bit compat sysenter target */ -ENTRY(xen_sysenter_target) - undo_xen_syscall - jmp ia32_sysenter_target -ENDPROC(xen_sysenter_target) - -#else /* !CONFIG_IA32_EMULATION */ - -ENTRY(xen_syscall32_target) -ENTRY(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx, %r11 */ - mov $-ENOSYS, %rax - pushq $0 - jmp hypercall_iret -ENDPROC(xen_syscall32_target) -ENDPROC(xen_sysenter_target) - -#endif /* CONFIG_IA32_EMULATION */ diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-head.S b/ANDROID_3.4.5/arch/x86/xen/xen-head.S deleted file mode 100644 index aaa7291c..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/xen-head.S +++ /dev/null @@ -1,55 +0,0 @@ -/* Xen-specific pieces of head.S, intended to be included in the right - place in head.S */ - -#ifdef CONFIG_XEN - -#include <linux/elfnote.h> -#include <linux/init.h> - -#include <asm/boot.h> -#include <asm/asm.h> -#include <asm/page_types.h> - -#include <xen/interface/elfnote.h> -#include <asm/xen/interface.h> - - __INIT -ENTRY(startup_xen) - cld -#ifdef CONFIG_X86_32 - mov %esi,xen_start_info - mov $init_thread_union+THREAD_SIZE,%esp -#else - mov %rsi,xen_start_info - mov $init_thread_union+THREAD_SIZE,%rsp -#endif - jmp xen_start_kernel - - __FINIT - -.pushsection .text - .align PAGE_SIZE -ENTRY(hypercall_page) - .skip PAGE_SIZE -.popsection - - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") -#ifdef CONFIG_X86_32 - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET) -#else - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) -#endif - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, - .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) - ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) - ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) - -#endif /*CONFIG_XEN */ diff --git a/ANDROID_3.4.5/arch/x86/xen/xen-ops.h b/ANDROID_3.4.5/arch/x86/xen/xen-ops.h deleted file mode 100644 index b095739c..00000000 --- a/ANDROID_3.4.5/arch/x86/xen/xen-ops.h +++ /dev/null @@ -1,123 +0,0 @@ -#ifndef XEN_OPS_H -#define XEN_OPS_H - -#include <linux/init.h> -#include <linux/clocksource.h> -#include <linux/irqreturn.h> -#include <xen/xen-ops.h> - -/* These are code, but not functions. Defined in entry.S */ -extern const char xen_hypervisor_callback[]; -extern const char xen_failsafe_callback[]; - -extern void *xen_initial_gdt; - -struct trap_info; -void xen_copy_trap_info(struct trap_info *traps); - -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); -DECLARE_PER_CPU(unsigned long, xen_cr3); -DECLARE_PER_CPU(unsigned long, xen_current_cr3); - -extern struct start_info *xen_start_info; -extern struct shared_info xen_dummy_shared_info; -extern struct shared_info *HYPERVISOR_shared_info; - -void xen_setup_mfn_list_list(void); -void xen_setup_shared_info(void); -void xen_build_mfn_list_list(void); -void xen_setup_machphys_mapping(void); -pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); -void xen_ident_map_ISA(void); -void xen_reserve_top(void); -extern unsigned long xen_max_p2m_pfn; - -void xen_set_pat(u64); - -char * __init xen_memory_setup(void); -void __init xen_arch_setup(void); -void __init xen_init_IRQ(void); -void xen_enable_sysenter(void); -void xen_enable_syscall(void); -void xen_vcpu_restore(void); - -void xen_callback_vector(void); -void xen_hvm_init_shared_info(void); -void xen_unplug_emulated_devices(void); - -void __init xen_build_dynamic_phys_to_machine(void); - -void xen_init_irq_ops(void); -void xen_setup_timer(int cpu); -void xen_setup_runstate_info(int cpu); -void xen_teardown_timer(int cpu); -cycle_t xen_clocksource_read(void); -void xen_setup_cpu_clockevents(void); -void __init xen_init_time_ops(void); -void __init xen_hvm_init_time_ops(void); - -irqreturn_t xen_debug_interrupt(int irq, void *dev_id); - -bool xen_vcpu_stolen(int vcpu); - -void xen_setup_vcpu_info_placement(void); - -#ifdef CONFIG_SMP -void xen_smp_init(void); -void __init xen_hvm_smp_init(void); - -extern cpumask_var_t xen_cpu_initialized_map; -#else -static inline void xen_smp_init(void) {} -static inline void xen_hvm_smp_init(void) {} -#endif - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -void __init xen_init_spinlocks(void); -void __cpuinit xen_init_lock_cpu(int cpu); -void xen_uninit_lock_cpu(int cpu); -#else -static inline void xen_init_spinlocks(void) -{ -} -static inline void xen_init_lock_cpu(int cpu) -{ -} -static inline void xen_uninit_lock_cpu(int cpu) -{ -} -#endif - -struct dom0_vga_console_info; - -#ifdef CONFIG_XEN_DOM0 -void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -#else -static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, - size_t size) -{ -} -#endif - -/* Declare an asm function, along with symbols needed to make it - inlineable */ -#define DECL_ASM(ret, name, ...) \ - ret name(__VA_ARGS__); \ - extern char name##_end[]; \ - extern char name##_reloc[] \ - -DECL_ASM(void, xen_irq_enable_direct, void); -DECL_ASM(void, xen_irq_disable_direct, void); -DECL_ASM(unsigned long, xen_save_fl_direct, void); -DECL_ASM(void, xen_restore_fl_direct, unsigned long); - -/* These are not functions, and cannot be called normally */ -void xen_iret(void); -void xen_sysexit(void); -void xen_sysret32(void); -void xen_sysret64(void); -void xen_adjust_exception_frame(void); - -extern int xen_panic_handler_init(void); - -#endif /* XEN_OPS_H */ |