diff options
Diffstat (limited to 'ANDROID_3.4.5/arch/x86/mm')
49 files changed, 0 insertions, 14947 deletions
diff --git a/ANDROID_3.4.5/arch/x86/mm/Makefile b/ANDROID_3.4.5/arch/x86/mm/Makefile deleted file mode 100644 index 23d8e5fe..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o gup.o setup_nx.o - -# Make sure __phys_addr has no stackprotector -nostackp := $(call cc-option, -fno-stack-protector) -CFLAGS_physaddr.o := $(nostackp) -CFLAGS_setup_nx.o := $(nostackp) - -obj-$(CONFIG_X86_PAT) += pat_rbtree.o -obj-$(CONFIG_SMP) += tlb.o - -obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o - -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o - -obj-$(CONFIG_HIGHMEM) += highmem_32.o - -obj-$(CONFIG_KMEMCHECK) += kmemcheck/ - -obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := kmmio.o pf_in.o mmio-mod.o -obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o - -obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o -obj-$(CONFIG_AMD_NUMA) += amdtopology.o -obj-$(CONFIG_ACPI_NUMA) += srat.o -obj-$(CONFIG_NUMA_EMU) += numa_emulation.o - -obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/ANDROID_3.4.5/arch/x86/mm/amdtopology.c b/ANDROID_3.4.5/arch/x86/mm/amdtopology.c deleted file mode 100644 index 5247d013..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/amdtopology.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * AMD NUMA support. - * Discover the memory map and associated nodes. - * - * This version reads it directly from the AMD northbridge. - * - * Copyright 2002,2003 Andi Kleen, SuSE Labs. - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/module.h> -#include <linux/nodemask.h> -#include <linux/memblock.h> -#include <linux/bootmem.h> - -#include <asm/io.h> -#include <linux/pci_ids.h> -#include <linux/acpi.h> -#include <asm/types.h> -#include <asm/mmzone.h> -#include <asm/proto.h> -#include <asm/e820.h> -#include <asm/pci-direct.h> -#include <asm/numa.h> -#include <asm/mpspec.h> -#include <asm/apic.h> -#include <asm/amd_nb.h> - -static unsigned char __initdata nodeids[8]; - -static __init int find_northbridge(void) -{ - int num; - - for (num = 0; num < 32; num++) { - u32 header; - - header = read_pci_config(0, num, 0, 0x00); - if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) && - header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) && - header != (PCI_VENDOR_ID_AMD | (0x1300<<16))) - continue; - - header = read_pci_config(0, num, 1, 0x00); - if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) && - header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) && - header != (PCI_VENDOR_ID_AMD | (0x1301<<16))) - continue; - return num; - } - - return -ENOENT; -} - -static __init void early_get_boot_cpu_id(void) -{ - /* - * need to get the APIC ID of the BSP so can use that to - * create apicid_to_node in amd_scan_nodes() - */ -#ifdef CONFIG_X86_MPPARSE - /* - * get boot-time SMP configuration: - */ - if (smp_found_config) - early_get_smp_config(); -#endif -} - -int __init amd_numa_init(void) -{ - u64 start = PFN_PHYS(0); - u64 end = PFN_PHYS(max_pfn); - unsigned numnodes; - u64 prevbase; - int i, j, nb; - u32 nodeid, reg; - unsigned int bits, cores, apicid_base; - - if (!early_pci_allowed()) - return -EINVAL; - - nb = find_northbridge(); - if (nb < 0) - return nb; - - pr_info("Scanning NUMA topology in Northbridge %d\n", nb); - - reg = read_pci_config(0, nb, 0, 0x60); - numnodes = ((reg >> 4) & 0xF) + 1; - if (numnodes <= 1) - return -ENOENT; - - pr_info("Number of physical nodes %d\n", numnodes); - - prevbase = 0; - for (i = 0; i < 8; i++) { - u64 base, limit; - - base = read_pci_config(0, nb, 1, 0x40 + i*8); - limit = read_pci_config(0, nb, 1, 0x44 + i*8); - - nodeids[i] = nodeid = limit & 7; - if ((base & 3) == 0) { - if (i < numnodes) - pr_info("Skipping disabled node %d\n", i); - continue; - } - if (nodeid >= numnodes) { - pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid, - base, limit); - continue; - } - - if (!limit) { - pr_info("Skipping node entry %d (base %Lx)\n", - i, base); - continue; - } - if ((base >> 8) & 3 || (limit >> 8) & 3) { - pr_err("Node %d using interleaving mode %Lx/%Lx\n", - nodeid, (base >> 8) & 3, (limit >> 8) & 3); - return -EINVAL; - } - if (node_isset(nodeid, numa_nodes_parsed)) { - pr_info("Node %d already present, skipping\n", - nodeid); - continue; - } - - limit >>= 16; - limit <<= 24; - limit |= (1<<24)-1; - limit++; - - if (limit > end) - limit = end; - if (limit <= base) - continue; - - base >>= 16; - base <<= 24; - - if (base < start) - base = start; - if (limit > end) - limit = end; - if (limit == base) { - pr_err("Empty node %d\n", nodeid); - continue; - } - if (limit < base) { - pr_err("Node %d bogus settings %Lx-%Lx.\n", - nodeid, base, limit); - continue; - } - - /* Could sort here, but pun for now. Should not happen anyroads. */ - if (prevbase > base) { - pr_err("Node map not sorted %Lx,%Lx\n", - prevbase, base); - return -EINVAL; - } - - pr_info("Node %d MemBase %016Lx Limit %016Lx\n", - nodeid, base, limit); - - prevbase = base; - numa_add_memblk(nodeid, base, limit); - node_set(nodeid, numa_nodes_parsed); - } - - if (!nodes_weight(numa_nodes_parsed)) - return -ENOENT; - - /* - * We seem to have valid NUMA configuration. Map apicids to nodes - * using the coreid bits from early_identify_cpu. - */ - bits = boot_cpu_data.x86_coreid_bits; - cores = 1 << bits; - apicid_base = 0; - - /* get the APIC ID of the BSP early for systems with apicid lifting */ - early_get_boot_cpu_id(); - if (boot_cpu_physical_apicid > 0) { - pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); - apicid_base = boot_cpu_physical_apicid; - } - - for_each_node_mask(i, numa_nodes_parsed) - for (j = apicid_base; j < cores + apicid_base; j++) - set_apicid_to_node((i << bits) + j, i); - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/dump_pagetables.c b/ANDROID_3.4.5/arch/x86/mm/dump_pagetables.c deleted file mode 100644 index 0002a3a3..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/dump_pagetables.c +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Debug helper to dump the current kernel pagetables of the system - * so that we can see what the various memory ranges are set to. - * - * (C) Copyright 2008 Intel Corporation - * - * Author: Arjan van de Ven <arjan@linux.intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include <linux/debugfs.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/seq_file.h> - -#include <asm/pgtable.h> - -/* - * The dumper groups pagetable entries of the same type into one, and for - * that it needs to keep some state when walking, and flush this state - * when a "break" in the continuity is found. - */ -struct pg_state { - int level; - pgprot_t current_prot; - unsigned long start_address; - unsigned long current_address; - const struct addr_marker *marker; -}; - -struct addr_marker { - unsigned long start_address; - const char *name; -}; - -/* indices for address_markers; keep sync'd w/ address_markers below */ -enum address_markers_idx { - USER_SPACE_NR = 0, -#ifdef CONFIG_X86_64 - KERNEL_SPACE_NR, - LOW_KERNEL_NR, - VMALLOC_START_NR, - VMEMMAP_START_NR, - HIGH_KERNEL_NR, - MODULES_VADDR_NR, - MODULES_END_NR, -#else - KERNEL_SPACE_NR, - VMALLOC_START_NR, - VMALLOC_END_NR, -# ifdef CONFIG_HIGHMEM - PKMAP_BASE_NR, -# endif - FIXADDR_START_NR, -#endif -}; - -/* Address space markers hints */ -static struct addr_marker address_markers[] = { - { 0, "User Space" }, -#ifdef CONFIG_X86_64 - { 0x8000000000000000UL, "Kernel Space" }, - { PAGE_OFFSET, "Low Kernel Mapping" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMEMMAP_START, "Vmemmap" }, - { __START_KERNEL_map, "High Kernel Mapping" }, - { MODULES_VADDR, "Modules" }, - { MODULES_END, "End Modules" }, -#else - { PAGE_OFFSET, "Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/*VMALLOC_END*/, "vmalloc() End" }, -# ifdef CONFIG_HIGHMEM - { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, -# endif - { 0/*FIXADDR_START*/, "Fixmap Area" }, -#endif - { -1, NULL } /* End of list */ -}; - -/* Multipliers for offsets within the PTEs */ -#define PTE_LEVEL_MULT (PAGE_SIZE) -#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) -#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) -#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) - -/* - * Print a readable form of a pgprot_t to the seq_file - */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level) -{ - pgprotval_t pr = pgprot_val(prot); - static const char * const level_name[] = - { "cr3", "pgd", "pud", "pmd", "pte" }; - - if (!pgprot_val(prot)) { - /* Not present */ - seq_printf(m, " "); - } else { - if (pr & _PAGE_USER) - seq_printf(m, "USR "); - else - seq_printf(m, " "); - if (pr & _PAGE_RW) - seq_printf(m, "RW "); - else - seq_printf(m, "ro "); - if (pr & _PAGE_PWT) - seq_printf(m, "PWT "); - else - seq_printf(m, " "); - if (pr & _PAGE_PCD) - seq_printf(m, "PCD "); - else - seq_printf(m, " "); - - /* Bit 9 has a different meaning on level 3 vs 4 */ - if (level <= 3) { - if (pr & _PAGE_PSE) - seq_printf(m, "PSE "); - else - seq_printf(m, " "); - } else { - if (pr & _PAGE_PAT) - seq_printf(m, "pat "); - else - seq_printf(m, " "); - } - if (pr & _PAGE_GLOBAL) - seq_printf(m, "GLB "); - else - seq_printf(m, " "); - if (pr & _PAGE_NX) - seq_printf(m, "NX "); - else - seq_printf(m, "x "); - } - seq_printf(m, "%s\n", level_name[level]); -} - -/* - * On 64 bits, sign-extend the 48 bit address to 64 bit - */ -static unsigned long normalize_addr(unsigned long u) -{ -#ifdef CONFIG_X86_64 - return (signed long)(u << 16) >> 16; -#else - return u; -#endif -} - -/* - * This function gets called on a break in a continuous series - * of PTE entries; the next one is different so we need to - * print what we collected so far. - */ -static void note_page(struct seq_file *m, struct pg_state *st, - pgprot_t new_prot, int level) -{ - pgprotval_t prot, cur; - static const char units[] = "KMGTPE"; - - /* - * If we have a "break" in the series, we need to flush the state that - * we have now. "break" is either changing perms, levels or - * address space marker. - */ - prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; - cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; - - if (!st->level) { - /* First entry */ - st->current_prot = new_prot; - st->level = level; - st->marker = address_markers; - seq_printf(m, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || level != st->level || - st->current_address >= st->marker[1].start_address) { - const char *unit = units; - unsigned long delta; - int width = sizeof(unsigned long) * 2; - - /* - * Now print the actual finished series - */ - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, st->current_address); - - delta = (st->current_address - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - seq_printf(m, "%9lu%c ", delta, *unit); - printk_prot(m, st->current_prot, st->level); - - /* - * We print markers for special areas of address space, - * such as the start of vmalloc space etc. - * This helps in the interpretation. - */ - if (st->current_address >= st->marker[1].start_address) { - st->marker++; - seq_printf(m, "---[ %s ]---\n", st->marker->name); - } - - st->start_address = st->current_address; - st->current_prot = new_prot; - st->level = level; - } -} - -static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, - unsigned long P) -{ - int i; - pte_t *start; - - start = (pte_t *) pmd_page_vaddr(addr); - for (i = 0; i < PTRS_PER_PTE; i++) { - pgprot_t prot = pte_pgprot(*start); - - st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); - note_page(m, st, prot, 4); - start++; - } -} - -#if PTRS_PER_PMD > 1 - -static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, - unsigned long P) -{ - int i; - pmd_t *start; - - start = (pmd_t *) pud_page_vaddr(addr); - for (i = 0; i < PTRS_PER_PMD; i++) { - st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); - if (!pmd_none(*start)) { - pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; - - if (pmd_large(*start) || !pmd_present(*start)) - note_page(m, st, __pgprot(prot), 3); - else - walk_pte_level(m, st, *start, - P + i * PMD_LEVEL_MULT); - } else - note_page(m, st, __pgprot(0), 3); - start++; - } -} - -#else -#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) -#define pud_large(a) pmd_large(__pmd(pud_val(a))) -#define pud_none(a) pmd_none(__pmd(pud_val(a))) -#endif - -#if PTRS_PER_PUD > 1 - -static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, - unsigned long P) -{ - int i; - pud_t *start; - - start = (pud_t *) pgd_page_vaddr(addr); - - for (i = 0; i < PTRS_PER_PUD; i++) { - st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { - pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; - - if (pud_large(*start) || !pud_present(*start)) - note_page(m, st, __pgprot(prot), 2); - else - walk_pmd_level(m, st, *start, - P + i * PUD_LEVEL_MULT); - } else - note_page(m, st, __pgprot(0), 2); - - start++; - } -} - -#else -#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) -#define pgd_large(a) pud_large(__pud(pgd_val(a))) -#define pgd_none(a) pud_none(__pud(pgd_val(a))) -#endif - -static void walk_pgd_level(struct seq_file *m) -{ -#ifdef CONFIG_X86_64 - pgd_t *start = (pgd_t *) &init_level4_pgt; -#else - pgd_t *start = swapper_pg_dir; -#endif - int i; - struct pg_state st; - - memset(&st, 0, sizeof(st)); - - for (i = 0; i < PTRS_PER_PGD; i++) { - st.current_address = normalize_addr(i * PGD_LEVEL_MULT); - if (!pgd_none(*start)) { - pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; - - if (pgd_large(*start) || !pgd_present(*start)) - note_page(m, &st, __pgprot(prot), 1); - else - walk_pud_level(m, &st, *start, - i * PGD_LEVEL_MULT); - } else - note_page(m, &st, __pgprot(0), 1); - - start++; - } - - /* Flush out the last page */ - st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); - note_page(m, &st, __pgprot(0), 0); -} - -static int ptdump_show(struct seq_file *m, void *v) -{ - walk_pgd_level(m); - return 0; -} - -static int ptdump_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, ptdump_show, NULL); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int pt_dump_init(void) -{ - struct dentry *pe; - -#ifdef CONFIG_X86_32 - /* Not a compile-time constant on x86-32 */ - address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; - address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; -# ifdef CONFIG_HIGHMEM - address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; -# endif - address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; -#endif - - pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, - &ptdump_fops); - if (!pe) - return -ENOMEM; - - return 0; -} - -__initcall(pt_dump_init); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); -MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); diff --git a/ANDROID_3.4.5/arch/x86/mm/extable.c b/ANDROID_3.4.5/arch/x86/mm/extable.c deleted file mode 100644 index 1fb85dbe..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/extable.c +++ /dev/null @@ -1,37 +0,0 @@ -#include <linux/module.h> -#include <linux/spinlock.h> -#include <asm/uaccess.h> - - -int fixup_exception(struct pt_regs *regs) -{ - const struct exception_table_entry *fixup; - -#ifdef CONFIG_PNPBIOS - if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { - extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; - extern u32 pnp_bios_is_utter_crap; - pnp_bios_is_utter_crap = 1; - printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n"); - __asm__ volatile( - "movl %0, %%esp\n\t" - "jmp *%1\n\t" - : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip)); - panic("do_trap: can't hit this"); - } -#endif - - fixup = search_exception_tables(regs->ip); - if (fixup) { - /* If fixup is less than 16, it means uaccess error */ - if (fixup->fixup < 16) { - current_thread_info()->uaccess_err = 1; - regs->ip += fixup->fixup; - return 1; - } - regs->ip = fixup->fixup; - return 1; - } - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/fault.c b/ANDROID_3.4.5/arch/x86/mm/fault.c deleted file mode 100644 index 3ecfd1aa..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/fault.c +++ /dev/null @@ -1,1211 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. - * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar - */ -#include <linux/magic.h> /* STACK_END_MAGIC */ -#include <linux/sched.h> /* test_thread_flag(), ... */ -#include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/module.h> /* search_exception_table */ -#include <linux/bootmem.h> /* max_low_pfn */ -#include <linux/kprobes.h> /* __kprobes, ... */ -#include <linux/mmiotrace.h> /* kmmio_handler, ... */ -#include <linux/perf_event.h> /* perf_sw_event */ -#include <linux/hugetlb.h> /* hstate_index_to_shift */ -#include <linux/prefetch.h> /* prefetchw */ - -#include <asm/traps.h> /* dotraplinkage, ... */ -#include <asm/pgalloc.h> /* pgd_*(), ... */ -#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ -#include <asm/fixmap.h> /* VSYSCALL_START */ - -/* - * Page fault error code bits: - * - * bit 0 == 0: no page found 1: protection fault - * bit 1 == 0: read access 1: write access - * bit 2 == 0: kernel-mode access 1: user-mode access - * bit 3 == 1: use of reserved bit detected - * bit 4 == 1: fault was an instruction fetch - */ -enum x86_pf_error_code { - - PF_PROT = 1 << 0, - PF_WRITE = 1 << 1, - PF_USER = 1 << 2, - PF_RSVD = 1 << 3, - PF_INSTR = 1 << 4, -}; - -/* - * Returns 0 if mmiotrace is disabled, or if the fault is not - * handled by mmiotrace: - */ -static inline int __kprobes -kmmio_fault(struct pt_regs *regs, unsigned long addr) -{ - if (unlikely(is_kmmio_active())) - if (kmmio_handler(regs, addr) == 1) - return -1; - return 0; -} - -static inline int __kprobes notify_page_fault(struct pt_regs *regs) -{ - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode_vm(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, 14)) - ret = 1; - preempt_enable(); - } - - return ret; -} - -/* - * Prefetch quirks: - * - * 32-bit mode: - * - * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. - * Check that here and ignore it. - * - * 64-bit mode: - * - * Sometimes the CPU reports invalid exceptions on prefetch. - * Check that here and ignore it. - * - * Opcode checker based on code by Richard Brunner. - */ -static inline int -check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, - unsigned char opcode, int *prefetch) -{ - unsigned char instr_hi = opcode & 0xf0; - unsigned char instr_lo = opcode & 0x0f; - - switch (instr_hi) { - case 0x20: - case 0x30: - /* - * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. - * In X86_64 long mode, the CPU will signal invalid - * opcode if some of these prefixes are present so - * X86_64 will never get here anyway - */ - return ((instr_lo & 7) == 0x6); -#ifdef CONFIG_X86_64 - case 0x40: - /* - * In AMD64 long mode 0x40..0x4F are valid REX prefixes - * Need to figure out under what instruction mode the - * instruction was issued. Could check the LDT for lm, - * but for now it's good enough to assume that long - * mode only uses well known segments or kernel. - */ - return (!user_mode(regs) || user_64bit_mode(regs)); -#endif - case 0x60: - /* 0x64 thru 0x67 are valid prefixes in all modes. */ - return (instr_lo & 0xC) == 0x4; - case 0xF0: - /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ - return !instr_lo || (instr_lo>>1) == 1; - case 0x00: - /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) - return 0; - - *prefetch = (instr_lo == 0xF) && - (opcode == 0x0D || opcode == 0x18); - return 0; - default: - return 0; - } -} - -static int -is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) -{ - unsigned char *max_instr; - unsigned char *instr; - int prefetch = 0; - - /* - * If it was a exec (instruction fetch) fault on NX page, then - * do not ignore the fault: - */ - if (error_code & PF_INSTR) - return 0; - - instr = (void *)convert_ip_to_linear(current, regs); - max_instr = instr + 15; - - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) - return 0; - - while (instr < max_instr) { - unsigned char opcode; - - if (probe_kernel_address(instr, opcode)) - break; - - instr++; - - if (!check_prefetch_opcode(regs, instr, opcode, &prefetch)) - break; - } - return prefetch; -} - -static void -force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk, int fault) -{ - unsigned lsb = 0; - siginfo_t info; - - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; - if (fault & VM_FAULT_HWPOISON_LARGE) - lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); - if (fault & VM_FAULT_HWPOISON) - lsb = PAGE_SHIFT; - info.si_addr_lsb = lsb; - - force_sig_info(si_signo, &info, tsk); -} - -DEFINE_SPINLOCK(pgd_lock); -LIST_HEAD(pgd_list); - -#ifdef CONFIG_X86_32 -static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) -{ - unsigned index = pgd_index(address); - pgd_t *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - - pgd += index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - return NULL; - - /* - * set_pgd(pgd, *pgd_k); here would be useless on PAE - * and redundant with the set_pmd() on non-PAE. As would - * set_pud. - */ - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) - return NULL; - - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - return NULL; - - if (!pmd_present(*pmd)) - set_pmd(pmd, *pmd_k); - else - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - - return pmd_k; -} - -void vmalloc_sync_all(void) -{ - unsigned long address; - - if (SHARED_KERNEL_PMD) - return; - - for (address = VMALLOC_START & PMD_MASK; - address >= TASK_SIZE && address < FIXADDR_TOP; - address += PMD_SIZE) { - struct page *page; - - spin_lock(&pgd_lock); - list_for_each_entry(page, &pgd_list, lru) { - spinlock_t *pgt_lock; - pmd_t *ret; - - /* the pgt_lock only for Xen */ - pgt_lock = &pgd_page_get_mm(page)->page_table_lock; - - spin_lock(pgt_lock); - ret = vmalloc_sync_one(page_address(page), address); - spin_unlock(pgt_lock); - - if (!ret) - break; - } - spin_unlock(&pgd_lock); - } -} - -/* - * 32-bit: - * - * Handle a fault on the vmalloc or module mapping area - */ -static noinline __kprobes int vmalloc_fault(unsigned long address) -{ - unsigned long pgd_paddr; - pmd_t *pmd_k; - pte_t *pte_k; - - /* Make sure we are in vmalloc area: */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - WARN_ON_ONCE(in_nmi()); - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "current" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - pgd_paddr = read_cr3(); - pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); - if (!pmd_k) - return -1; - - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - return -1; - - return 0; -} - -/* - * Did it hit the DOS screen memory VA from vm86 mode? - */ -static inline void -check_v8086_mode(struct pt_regs *regs, unsigned long address, - struct task_struct *tsk) -{ - unsigned long bit; - - if (!v8086_mode(regs)) - return; - - bit = (address - 0xA0000) >> PAGE_SHIFT; - if (bit < 32) - tsk->thread.screen_bitmap |= 1 << bit; -} - -static bool low_pfn(unsigned long pfn) -{ - return pfn < max_low_pfn; -} - -static void dump_pagetable(unsigned long address) -{ - pgd_t *base = __va(read_cr3()); - pgd_t *pgd = &base[pgd_index(address)]; - pmd_t *pmd; - pte_t *pte; - -#ifdef CONFIG_X86_PAE - printk("*pdpt = %016Lx ", pgd_val(*pgd)); - if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) - goto out; -#endif - pmd = pmd_offset(pud_offset(pgd, address), address); - printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); - - /* - * We must not directly access the pte in the highpte - * case if the page table is located in highmem. - * And let's rather not kmap-atomic the pte, just in case - * it's allocated already: - */ - if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) - goto out; - - pte = pte_offset_kernel(pmd, address); - printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); -out: - printk("\n"); -} - -#else /* CONFIG_X86_64: */ - -void vmalloc_sync_all(void) -{ - sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); -} - -/* - * 64-bit: - * - * Handle a fault on the vmalloc area - * - * This assumes no large pages in there. - */ -static noinline __kprobes int vmalloc_fault(unsigned long address) -{ - pgd_t *pgd, *pgd_ref; - pud_t *pud, *pud_ref; - pmd_t *pmd, *pmd_ref; - pte_t *pte, *pte_ref; - - /* Make sure we are in vmalloc area: */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - WARN_ON_ONCE(in_nmi()); - - /* - * Copy kernel mappings over when needed. This can also - * happen within a race in page table update. In the later - * case just flush: - */ - pgd = pgd_offset(current->active_mm, address); - pgd_ref = pgd_offset_k(address); - if (pgd_none(*pgd_ref)) - return -1; - - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - - /* - * Below here mismatches are bugs because these lower tables - * are shared: - */ - - pud = pud_offset(pgd, address); - pud_ref = pud_offset(pgd_ref, address); - if (pud_none(*pud_ref)) - return -1; - - if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) - BUG(); - - pmd = pmd_offset(pud, address); - pmd_ref = pmd_offset(pud_ref, address); - if (pmd_none(*pmd_ref)) - return -1; - - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) - BUG(); - - pte_ref = pte_offset_kernel(pmd_ref, address); - if (!pte_present(*pte_ref)) - return -1; - - pte = pte_offset_kernel(pmd, address); - - /* - * Don't use pte_page here, because the mappings can point - * outside mem_map, and the NUMA hash lookup cannot handle - * that: - */ - if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) - BUG(); - - return 0; -} - -#ifdef CONFIG_CPU_SUP_AMD -static const char errata93_warning[] = -KERN_ERR -"******* Your BIOS seems to not contain a fix for K8 errata #93\n" -"******* Working around it, but it may cause SEGVs or burn power.\n" -"******* Please consider a BIOS update.\n" -"******* Disabling USB legacy in the BIOS may also help.\n"; -#endif - -/* - * No vm86 mode in 64-bit mode: - */ -static inline void -check_v8086_mode(struct pt_regs *regs, unsigned long address, - struct task_struct *tsk) -{ -} - -static int bad_address(void *p) -{ - unsigned long dummy; - - return probe_kernel_address((unsigned long *)p, dummy); -} - -static void dump_pagetable(unsigned long address) -{ - pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK); - pgd_t *pgd = base + pgd_index(address); - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (bad_address(pgd)) - goto bad; - - printk("PGD %lx ", pgd_val(*pgd)); - - if (!pgd_present(*pgd)) - goto out; - - pud = pud_offset(pgd, address); - if (bad_address(pud)) - goto bad; - - printk("PUD %lx ", pud_val(*pud)); - if (!pud_present(*pud) || pud_large(*pud)) - goto out; - - pmd = pmd_offset(pud, address); - if (bad_address(pmd)) - goto bad; - - printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd) || pmd_large(*pmd)) - goto out; - - pte = pte_offset_kernel(pmd, address); - if (bad_address(pte)) - goto bad; - - printk("PTE %lx", pte_val(*pte)); -out: - printk("\n"); - return; -bad: - printk("BAD\n"); -} - -#endif /* CONFIG_X86_64 */ - -/* - * Workaround for K8 erratum #93 & buggy BIOS. - * - * BIOS SMM functions are required to use a specific workaround - * to avoid corruption of the 64bit RIP register on C stepping K8. - * - * A lot of BIOS that didn't get tested properly miss this. - * - * The OS sees this as a page fault with the upper 32bits of RIP cleared. - * Try to work around it here. - * - * Note we only handle faults in kernel here. - * Does nothing on 32-bit. - */ -static int is_errata93(struct pt_regs *regs, unsigned long address) -{ -#if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD) - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD - || boot_cpu_data.x86 != 0xf) - return 0; - - if (address != regs->ip) - return 0; - - if ((address >> 32) != 0) - return 0; - - address |= 0xffffffffUL << 32; - if ((address >= (u64)_stext && address <= (u64)_etext) || - (address >= MODULES_VADDR && address <= MODULES_END)) { - printk_once(errata93_warning); - regs->ip = address; - return 1; - } -#endif - return 0; -} - -/* - * Work around K8 erratum #100 K8 in compat mode occasionally jumps - * to illegal addresses >4GB. - * - * We catch this in the page fault handler because these addresses - * are not reachable. Just detect this case and return. Any code - * segment in LDT is compatibility mode. - */ -static int is_errata100(struct pt_regs *regs, unsigned long address) -{ -#ifdef CONFIG_X86_64 - if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) - return 1; -#endif - return 0; -} - -static int is_f00f_bug(struct pt_regs *regs, unsigned long address) -{ -#ifdef CONFIG_X86_F00F_BUG - unsigned long nr; - - /* - * Pentium F0 0F C7 C8 bug workaround: - */ - if (boot_cpu_data.f00f_bug) { - nr = (address - idt_descr.address) >> 3; - - if (nr == 6) { - do_invalid_op(regs, 0); - return 1; - } - } -#endif - return 0; -} - -static const char nx_warning[] = KERN_CRIT -"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; - -static void -show_fault_oops(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - if (!oops_may_print()) - return; - - if (error_code & PF_INSTR) { - unsigned int level; - - pte_t *pte = lookup_address(address, &level); - - if (pte && pte_present(*pte) && !pte_exec(*pte)) - printk(nx_warning, current_uid()); - } - - printk(KERN_ALERT "BUG: unable to handle kernel "); - if (address < PAGE_SIZE) - printk(KERN_CONT "NULL pointer dereference"); - else - printk(KERN_CONT "paging request"); - - printk(KERN_CONT " at %p\n", (void *) address); - printk(KERN_ALERT "IP:"); - printk_address(regs->ip, 1); - - dump_pagetable(address); -} - -static noinline void -pgtable_bad(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - struct task_struct *tsk; - unsigned long flags; - int sig; - - flags = oops_begin(); - tsk = current; - sig = SIGKILL; - - printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", - tsk->comm, address); - dump_pagetable(address); - - tsk->thread.cr2 = address; - tsk->thread.trap_nr = X86_TRAP_PF; - tsk->thread.error_code = error_code; - - if (__die("Bad pagetable", regs, error_code)) - sig = 0; - - oops_end(flags, regs, sig); -} - -static noinline void -no_context(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int signal, int si_code) -{ - struct task_struct *tsk = current; - unsigned long *stackend; - unsigned long flags; - int sig; - - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) { - if (current_thread_info()->sig_on_uaccess_error && signal) { - tsk->thread.trap_nr = X86_TRAP_PF; - tsk->thread.error_code = error_code | PF_USER; - tsk->thread.cr2 = address; - - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_info_fault(signal, si_code, address, tsk, 0); - } - return; - } - - /* - * 32-bit: - * - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. - * - * 64-bit: - * - * Hall of shame of CPU/BIOS bugs. - */ - if (is_prefetch(regs, error_code, address)) - return; - - if (is_errata93(regs, address)) - return; - - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice: - */ - flags = oops_begin(); - - show_fault_oops(regs, error_code, address); - - stackend = end_of_stack(tsk); - if (tsk != &init_task && *stackend != STACK_END_MAGIC) - printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); - - tsk->thread.cr2 = address; - tsk->thread.trap_nr = X86_TRAP_PF; - tsk->thread.error_code = error_code; - - sig = SIGKILL; - if (__die("Oops", regs, error_code)) - sig = 0; - - /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_DEFAULT "CR2: %016lx\n", address); - - oops_end(flags, regs, sig); -} - -/* - * Print out info about fatal segfaults, if the show_unhandled_signals - * sysctl is set: - */ -static inline void -show_signal_msg(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct task_struct *tsk) -{ - if (!unhandled_signal(tsk, SIGSEGV)) - return; - - if (!printk_ratelimit()) - return; - - printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, - (void *)regs->ip, (void *)regs->sp, error_code); - - print_vma_addr(KERN_CONT " in ", regs->ip); - - printk(KERN_CONT "\n"); -} - -static void -__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int si_code) -{ - struct task_struct *tsk = current; - - /* User mode accesses just cause a SIGSEGV */ - if (error_code & PF_USER) { - /* - * It's possible to have interrupts off here: - */ - local_irq_enable(); - - /* - * Valid to do another page fault here because this one came - * from user space: - */ - if (is_prefetch(regs, error_code, address)) - return; - - if (is_errata100(regs, address)) - return; - -#ifdef CONFIG_X86_64 - /* - * Instruction fetch faults in the vsyscall page might need - * emulation. - */ - if (unlikely((error_code & PF_INSTR) && - ((address & ~0xfff) == VSYSCALL_START))) { - if (emulate_vsyscall(regs, address)) - return; - } -#endif - - if (unlikely(show_unhandled_signals)) - show_signal_msg(regs, error_code, address, tsk); - - /* Kernel addresses are always protection faults: */ - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_nr = X86_TRAP_PF; - - force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); - - return; - } - - if (is_f00f_bug(regs, address)) - return; - - no_context(regs, error_code, address, SIGSEGV, si_code); -} - -static noinline void -bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); -} - -static void -__bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int si_code) -{ - struct mm_struct *mm = current->mm; - - /* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ - up_read(&mm->mmap_sem); - - __bad_area_nosemaphore(regs, error_code, address, si_code); -} - -static noinline void -bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) -{ - __bad_area(regs, error_code, address, SEGV_MAPERR); -} - -static noinline void -bad_area_access_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - __bad_area(regs, error_code, address, SEGV_ACCERR); -} - -/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ -static void -out_of_memory(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed): - */ - up_read(¤t->mm->mmap_sem); - - pagefault_out_of_memory(); -} - -static void -do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - unsigned int fault) -{ - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - int code = BUS_ADRERR; - - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die: */ - if (!(error_code & PF_USER)) { - no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); - return; - } - - /* User-space => ok to do another page fault: */ - if (is_prefetch(regs, error_code, address)) - return; - - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_PF; - -#ifdef CONFIG_MEMORY_FAILURE - if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { - printk(KERN_ERR - "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", - tsk->comm, tsk->pid, address); - code = BUS_MCEERR_AR; - } -#endif - force_sig_info_fault(SIGBUS, code, address, tsk, fault); -} - -static noinline int -mm_fault_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, unsigned int fault) -{ - /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue pagefault. - */ - if (fatal_signal_pending(current)) { - if (!(fault & VM_FAULT_RETRY)) - up_read(¤t->mm->mmap_sem); - if (!(error_code & PF_USER)) - no_context(regs, error_code, address, 0, 0); - return 1; - } - if (!(fault & VM_FAULT_ERROR)) - return 0; - - if (fault & VM_FAULT_OOM) { - /* Kernel mode? Handle exceptions or die: */ - if (!(error_code & PF_USER)) { - up_read(¤t->mm->mmap_sem); - no_context(regs, error_code, address, - SIGSEGV, SEGV_MAPERR); - return 1; - } - - out_of_memory(regs, error_code, address); - } else { - if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| - VM_FAULT_HWPOISON_LARGE)) - do_sigbus(regs, error_code, address, fault); - else - BUG(); - } - return 1; -} - -static int spurious_fault_check(unsigned long error_code, pte_t *pte) -{ - if ((error_code & PF_WRITE) && !pte_write(*pte)) - return 0; - - if ((error_code & PF_INSTR) && !pte_exec(*pte)) - return 0; - - return 1; -} - -/* - * Handle a spurious fault caused by a stale TLB entry. - * - * This allows us to lazily refresh the TLB when increasing the - * permissions of a kernel page (RO -> RW or NX -> X). Doing it - * eagerly is very expensive since that implies doing a full - * cross-processor TLB flush, even if no stale TLB entries exist - * on other processors. - * - * There are no security implications to leaving a stale TLB when - * increasing the permissions on a page. - */ -static noinline __kprobes int -spurious_fault(unsigned long error_code, unsigned long address) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int ret; - - /* Reserved-bit violation or user access to kernel space? */ - if (error_code & (PF_USER | PF_RSVD)) - return 0; - - pgd = init_mm.pgd + pgd_index(address); - if (!pgd_present(*pgd)) - return 0; - - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - return 0; - - if (pud_large(*pud)) - return spurious_fault_check(error_code, (pte_t *) pud); - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - return 0; - - if (pmd_large(*pmd)) - return spurious_fault_check(error_code, (pte_t *) pmd); - - /* - * Note: don't use pte_present() here, since it returns true - * if the _PAGE_PROTNONE bit is set. However, this aliases the - * _PAGE_GLOBAL bit, which for kernel pages give false positives - * when CONFIG_DEBUG_PAGEALLOC is used. - */ - pte = pte_offset_kernel(pmd, address); - if (!(pte_flags(*pte) & _PAGE_PRESENT)) - return 0; - - ret = spurious_fault_check(error_code, pte); - if (!ret) - return 0; - - /* - * Make sure we have permissions in PMD. - * If not, then there's a bug in the page tables: - */ - ret = spurious_fault_check(error_code, (pte_t *) pmd); - WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); - - return ret; -} - -int show_unhandled_signals = 1; - -static inline int -access_error(unsigned long error_code, struct vm_area_struct *vma) -{ - if (error_code & PF_WRITE) { - /* write, present and write, not present: */ - if (unlikely(!(vma->vm_flags & VM_WRITE))) - return 1; - return 0; - } - - /* read, present: */ - if (unlikely(error_code & PF_PROT)) - return 1; - - /* read, not present: */ - if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) - return 1; - - return 0; -} - -static int fault_in_kernel_space(unsigned long address) -{ - return address >= TASK_SIZE_MAX; -} - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - */ -dotraplinkage void __kprobes -do_page_fault(struct pt_regs *regs, unsigned long error_code) -{ - struct vm_area_struct *vma; - struct task_struct *tsk; - unsigned long address; - struct mm_struct *mm; - int fault; - int write = error_code & PF_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); - - tsk = current; - mm = tsk->mm; - - /* Get the faulting address: */ - address = read_cr2(); - - /* - * Detect and handle instructions that would cause a page fault for - * both a tracked kernel page and a userspace page. - */ - if (kmemcheck_active(regs)) - kmemcheck_hide(regs); - prefetchw(&mm->mmap_sem); - - if (unlikely(kmmio_fault(regs, address))) - return; - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * This verifies that the fault happens in kernel space - * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 9) == 0. - */ - if (unlikely(fault_in_kernel_space(address))) { - if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { - if (vmalloc_fault(address) >= 0) - return; - - if (kmemcheck_fault(regs, address, error_code)) - return; - } - - /* Can handle a stale RO->RW TLB: */ - if (spurious_fault(error_code, address)) - return; - - /* kprobes don't want to hook the spurious faults: */ - if (notify_page_fault(regs)) - return; - /* - * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock: - */ - bad_area_nosemaphore(regs, error_code, address); - - return; - } - - /* kprobes don't want to hook the spurious faults: */ - if (unlikely(notify_page_fault(regs))) - return; - /* - * It's safe to allow irq's after cr2 has been saved and the - * vmalloc fault has been handled. - * - * User-mode registers count as a user access even for any - * potential system fault or CPU buglet: - */ - if (user_mode_vm(regs)) { - local_irq_enable(); - error_code |= PF_USER; - } else { - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); - } - - if (unlikely(error_code & PF_RSVD)) - pgtable_bad(regs, error_code, address); - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - - /* - * If we're in an interrupt, have no user context or are running - * in an atomic region then we must not take the fault: - */ - if (unlikely(in_atomic() || !mm)) { - bad_area_nosemaphore(regs, error_code, address); - return; - } - - /* - * When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in - * the kernel and should generate an OOPS. Unfortunately, in the - * case of an erroneous fault occurring in a code path which already - * holds mmap_sem we will deadlock attempting to validate the fault - * against the address space. Luckily the kernel only validly - * references user space from well defined areas of code, which are - * listed in the exceptions table. - * - * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibility of a - * deadlock. Attempt to lock the address space, if we cannot we then - * validate the source. If this is invalid we can skip the address - * space check, thus avoiding the deadlock: - */ - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { - if ((error_code & PF_USER) == 0 && - !search_exception_tables(regs->ip)) { - bad_area_nosemaphore(regs, error_code, address); - return; - } -retry: - down_read(&mm->mmap_sem); - } else { - /* - * The above down_read_trylock() might have succeeded in - * which case we'll have missed the might_sleep() from - * down_read(): - */ - might_sleep(); - } - - vma = find_vma(mm, address); - if (unlikely(!vma)) { - bad_area(regs, error_code, address); - return; - } - if (likely(vma->vm_start <= address)) - goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { - bad_area(regs, error_code, address); - return; - } - if (error_code & PF_USER) { - /* - * Accessing the stack below %sp is always a bug. - * The large cushion allows instructions like enter - * and pusha to work. ("enter $65535, $31" pushes - * 32 pointers and then decrements %sp by 65535.) - */ - if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { - bad_area(regs, error_code, address); - return; - } - } - if (unlikely(expand_stack(vma, address))) { - bad_area(regs, error_code, address); - return; - } - - /* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - if (unlikely(access_error(error_code, vma))) { - bad_area_access_error(regs, error_code, address); - return; - } - - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault: - */ - fault = handle_mm_fault(mm, vma, address, flags); - - if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - if (mm_fault_error(regs, error_code, address, fault)) - return; - } - - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; - goto retry; - } - } - - check_v8086_mode(regs, address, tsk); - - up_read(&mm->mmap_sem); -} diff --git a/ANDROID_3.4.5/arch/x86/mm/gup.c b/ANDROID_3.4.5/arch/x86/mm/gup.c deleted file mode 100644 index dd74e468..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/gup.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Lockless get_user_pages_fast for x86 - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmstat.h> -#include <linux/highmem.h> -#include <linux/swap.h> - -#include <asm/pgtable.h> - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#ifndef CONFIG_X86_PAE - return ACCESS_ONCE(*ptep); -#else - /* - * With get_user_pages_fast, we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atomically, - * but that is not possible (without expensive cmpxchg8b) on PAE. What - * we do have is the guarantee that a pte will only either go from not - * present to present, or present to not present or both -- it will not - * switch to a completely different present page without a TLB flush in - * between; something that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees l iff pte_high - * sees h. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have got rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. And get_user_pages_fast only operates on present ptes, so - * we're safe. - * - * gup_get_pte should not be used or copied outside gup.c without being - * very careful -- it does not atomically load the pte or anything that - * is likely to be useful for you. - */ - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#endif -} - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask; - pte_t *ptep; - - mask = _PAGE_PRESENT|_PAGE_USER; - if (write) - mask |= _PAGE_RW; - - ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { - pte_unmap(ptep); - return 0; - } - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - SetPageReferenced(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); - - return 1; -} - -static inline void get_head_page_multiple(struct page *page, int nr) -{ - VM_BUG_ON(page != compound_head(page)); - VM_BUG_ON(page_count(page) == 0); - atomic_add(nr, &page->_count); - SetPageReferenced(page); -} - -static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask; - pte_t pte = *(pte_t *)&pmd; - struct page *head, *page; - int refs; - - mask = _PAGE_PRESENT|_PAGE_USER; - if (write) - mask |= _PAGE_RW; - if ((pte_flags(pte) & mask) != mask) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - get_head_page_multiple(head, refs); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - /* - * The pmd_trans_splitting() check below explains why - * pmdp_splitting_flush has to flush the tlb, to stop - * this gup-fast code from running while we set the - * splitting bit in the pmd. Returning zero will take - * the slow path that will call wait_split_huge_page() - * if the pmd is still in splitting state. gup-fast - * can't because it has irq disabled and - * wait_split_huge_page() would never return as the - * tlb flush IPI wouldn't run. - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (unlikely(pmd_large(pmd))) { - if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) - return 0; - } else { - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static noinline int gup_huge_pud(pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask; - pte_t pte = *(pte_t *)&pud; - struct page *head, *page; - int refs; - - mask = _PAGE_PRESENT|_PAGE_USER; - if (write) - mask |= _PAGE_RW; - if ((pte_flags(pte) & mask) != mask) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - if (PageTail(page)) - get_huge_page_tail(page); - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - get_head_page_multiple(head, refs); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_large(pud))) { - if (!gup_huge_pud(pud, addr, next, write, pages, nr)) - return 0; - } else { - if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - (void __user *)start, len))) - return 0; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed on x86. - * - * So long as we atomically load page table pointers versus teardown - * (which we do on x86, with the above PAE exception), we can follow the - * address down to the the page and take a ref on it. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - - end = start + len; - if (end < start) - goto slow_irqon; - -#ifdef CONFIG_X86_64 - if (end >> __VIRTUAL_MASK_SHIFT) - goto slow_irqon; -#endif - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed on x86. - * - * So long as we atomically load page table pointers versus teardown - * (which we do on x86, with the above PAE exception), we can follow the - * address down to the the page and take a ref on it. - */ - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; - - { - int ret; - -slow: - local_irq_enable(); -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - - return ret; - } -} diff --git a/ANDROID_3.4.5/arch/x86/mm/highmem_32.c b/ANDROID_3.4.5/arch/x86/mm/highmem_32.c deleted file mode 100644 index 6f31ee56..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/highmem_32.c +++ /dev/null @@ -1,141 +0,0 @@ -#include <linux/highmem.h> -#include <linux/module.h> -#include <linux/swap.h> /* for totalram_pages */ - -void *kmap(struct page *page) -{ - might_sleep(); - if (!PageHighMem(page)) - return page_address(page); - return kmap_high(page); -} -EXPORT_SYMBOL(kmap); - -void kunmap(struct page *page) -{ - if (in_interrupt()) - BUG(); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} -EXPORT_SYMBOL(kunmap); - -/* - * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because - * no global lock is needed and because the kmap code must perform a global TLB - * invalidation when the kmap pool wraps. - * - * However when holding an atomic kmap it is not legal to sleep, so atomic - * kmaps are appropriate for short, tight code paths only. - */ -void *kmap_atomic_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - pagefault_disable(); - - if (!PageHighMem(page)) - return page_address(page); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); - arch_flush_lazy_mmu_mode(); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_prot); - -void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); -} -EXPORT_SYMBOL(kmap_atomic); - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - return kmap_atomic_prot_pfn(pfn, kmap_prot); -} -EXPORT_SYMBOL_GPL(kmap_atomic_pfn); - -void __kunmap_atomic(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - - if (vaddr >= __fix_to_virt(FIX_KMAP_END) && - vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { - int idx, type; - - type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR * smp_processor_id(); - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); -#endif - /* - * Force other mappings to Oops if they'll try to access this - * pte without first remap it. Keeping stale mappings around - * is a bad idea also, in case the page changes cacheability - * attributes or becomes a protected page in a hypervisor. - */ - kpte_clear_flush(kmap_pte-idx, vaddr); - kmap_atomic_idx_pop(); - arch_flush_lazy_mmu_mode(); - } -#ifdef CONFIG_DEBUG_HIGHMEM - else { - BUG_ON(vaddr < PAGE_OFFSET); - BUG_ON(vaddr >= (unsigned long)high_memory); - } -#endif - - pagefault_enable(); -} -EXPORT_SYMBOL(__kunmap_atomic); - -struct page *kmap_atomic_to_page(void *ptr) -{ - unsigned long idx, vaddr = (unsigned long)ptr; - pte_t *pte; - - if (vaddr < FIXADDR_START) - return virt_to_page(ptr); - - idx = virt_to_fix(vaddr); - pte = kmap_pte - (idx - FIX_KMAP_BEGIN); - return pte_page(*pte); -} -EXPORT_SYMBOL(kmap_atomic_to_page); - -void __init set_highmem_pages_init(void) -{ - struct zone *zone; - int nid; - - for_each_zone(zone) { - unsigned long zone_start_pfn, zone_end_pfn; - - if (!is_highmem(zone)) - continue; - - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone_start_pfn + zone->spanned_pages; - - nid = zone_to_nid(zone); - printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, nid, zone_start_pfn, zone_end_pfn); - - add_highpages_with_active_regions(nid, zone_start_pfn, - zone_end_pfn); - } - totalram_pages += totalhigh_pages; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/hugetlbpage.c b/ANDROID_3.4.5/arch/x86/mm/hugetlbpage.c deleted file mode 100644 index f6679a7f..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/hugetlbpage.c +++ /dev/null @@ -1,443 +0,0 @@ -/* - * IA-32 Huge TLB Page Support for Kernel. - * - * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> - */ - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/pagemap.h> -#include <linux/err.h> -#include <linux/sysctl.h> -#include <asm/mman.h> -#include <asm/tlb.h> -#include <asm/tlbflush.h> -#include <asm/pgalloc.h> - -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vm_flags != svm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * search for a shareable pmd page for hugetlb. - */ -static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct prio_tree_iter iter; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - - if (!vma_shareable(vma, addr)) - return; - - mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - mutex_unlock(&mapping->i_mmap_mutex); -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (sz == PUD_SIZE) { - pte = (pte_t *)pud; - } else { - BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) - huge_pmd_share(mm, addr, pud); - pte = (pte_t *) pmd_alloc(mm, pud, addr); - } - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_large(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } - return (pte_t *) pmd; -} - -#if 0 /* This is just for testing */ -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - unsigned long start = address; - int length = 1; - int nr; - struct page *page; - struct vm_area_struct *vma; - - vma = find_vma(mm, addr); - if (!vma || !is_vm_hugetlb_page(vma)) - return ERR_PTR(-EINVAL); - - pte = huge_pte_offset(mm, address); - - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); - - page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; - - WARN_ON(!PageHead(page)); - - return page; -} - -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -int pud_huge(pud_t pud) -{ - return 0; -} - -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} - -#else - -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - -int pmd_huge(pmd_t pmd) -{ - return !!(pmd_val(pmd) & _PAGE_PSE); -} - -int pud_huge(pud_t pud) -{ - return !!(pud_val(pud) & _PAGE_PSE); -} - -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - -#endif - -/* x86_64 also uses this file */ - -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA -static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > mm->cached_hole_size) { - start_addr = mm->free_area_cache; - } else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - addr = ALIGN(start_addr, huge_page_size(h)); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = ALIGN(vma->vm_end, huge_page_size(h)); - } -} - -static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, - unsigned long addr0, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long base = mm->mmap_base; - unsigned long addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - unsigned long start_addr; - - /* don't allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - start_addr = mm->free_area_cache; - - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or can't fit in requested address hole */ - addr = (mm->free_area_cache - len) & huge_page_mask(h); - do { - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - vma = find_vma(mm, addr); - if (!vma) - return addr; - - if (addr + len <= vma->vm_start) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else if (mm->free_area_cache == vma->vm_end) { - /* pull free_area_cache down to the first hole */ - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = (vma->vm_start - len) & huge_page_mask(h); - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (start_addr != base) { - mm->free_area_cache = base; - largest_hole = 0; - goto try_again; - } - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = hugetlb_get_unmapped_area_bottomup(file, addr0, - len, pgoff, flags); - - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; - - return addr; -} - -unsigned long -hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (len > TASK_SIZE) - return -ENOMEM; - - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = ALIGN(addr, huge_page_size(h)); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) - return addr; - } - if (mm->get_unmapped_area == arch_get_unmapped_area) - return hugetlb_get_unmapped_area_bottomup(file, addr, len, - pgoff, flags); - else - return hugetlb_get_unmapped_area_topdown(file, addr, len, - pgoff, flags); -} - -#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ - -#ifdef CONFIG_X86_64 -static __init int setup_hugepagesz(char *opt) -{ - unsigned long ps = memparse(opt, &opt); - if (ps == PMD_SIZE) { - hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (ps == PUD_SIZE && cpu_has_gbpages) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else { - printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", - ps >> 20); - return 0; - } - return 1; -} -__setup("hugepagesz=", setup_hugepagesz); -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/init.c b/ANDROID_3.4.5/arch/x86/mm/init.c deleted file mode 100644 index 4f0cec7e..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/init.c +++ /dev/null @@ -1,416 +0,0 @@ -#include <linux/gfp.h> -#include <linux/initrd.h> -#include <linux/ioport.h> -#include <linux/swap.h> -#include <linux/memblock.h> -#include <linux/bootmem.h> /* for max_low_pfn */ - -#include <asm/cacheflush.h> -#include <asm/e820.h> -#include <asm/init.h> -#include <asm/page.h> -#include <asm/page_types.h> -#include <asm/sections.h> -#include <asm/setup.h> -#include <asm/tlbflush.h> -#include <asm/tlb.h> -#include <asm/proto.h> -#include <asm/dma.h> /* for MAX_DMA_PFN */ - -unsigned long __initdata pgt_buf_start; -unsigned long __meminitdata pgt_buf_end; -unsigned long __meminitdata pgt_buf_top; - -int after_bootmem; - -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) -{ - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; - phys_addr_t base; - - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 - extra += PMD_SIZE; -#endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - -#ifdef CONFIG_X86_32 - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif - good_end = max_pfn_mapped << PAGE_SHIFT; - - base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); - if (!base) - panic("Cannot find space for the kernel page tables"); - - pgt_buf_start = base >> PAGE_SHIFT; - pgt_buf_end = pgt_buf_start; - pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); -} - -void __init native_pagetable_reserve(u64 start, u64 end) -{ - memblock_reserve(start, end - start); -} - -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - -#ifdef CONFIG_X86_32 -#define NR_RANGE_MR 3 -#else /* CONFIG_X86_64 */ -#define NR_RANGE_MR 5 -#endif - -static int __meminit save_mr(struct map_range *mr, int nr_range, - unsigned long start_pfn, unsigned long end_pfn, - unsigned long page_size_mask) -{ - if (start_pfn < end_pfn) { - if (nr_range >= NR_RANGE_MR) - panic("run out of range for init_memory_mapping\n"); - mr[nr_range].start = start_pfn<<PAGE_SHIFT; - mr[nr_range].end = end_pfn<<PAGE_SHIFT; - mr[nr_range].page_size_mask = page_size_mask; - nr_range++; - } - - return nr_range; -} - -/* - * Setup the direct mapping of the physical memory at PAGE_OFFSET. - * This runs before bootmem is initialized and gets pages directly from - * the physical memory. To access them they are temporarily mapped. - */ -unsigned long __init_refok init_memory_mapping(unsigned long start, - unsigned long end) -{ - unsigned long page_size_mask = 0; - unsigned long start_pfn, end_pfn; - unsigned long ret = 0; - unsigned long pos; - - struct map_range mr[NR_RANGE_MR]; - int nr_range, i; - int use_pse, use_gbpages; - - printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); - -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) - /* - * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. - * This will simplify cpa(), which otherwise needs to support splitting - * large pages into small in interrupt context, etc. - */ - use_pse = use_gbpages = 0; -#else - use_pse = cpu_has_pse; - use_gbpages = direct_gbpages; -#endif - - /* Enable PSE if available */ - if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); - - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } - - if (use_gbpages) - page_size_mask |= 1 << PG_LEVEL_1G; - if (use_pse) - page_size_mask |= 1 << PG_LEVEL_2M; - - memset(mr, 0, sizeof(mr)); - nr_range = 0; - - /* head if not big page alignment ? */ - start_pfn = start >> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; -#ifdef CONFIG_X86_32 - /* - * Don't use a large page for the first 2/4MB of memory - * because there are often fixed size MTRRs in there - * and overlapping MTRRs into large pages can cause - * slowdowns. - */ - if (pos == 0) - end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); - else - end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#endif - if (end_pfn > (end >> PAGE_SHIFT)) - end_pfn = end >> PAGE_SHIFT; - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = end_pfn << PAGE_SHIFT; - } - - /* big page (2M) range */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); -#ifdef CONFIG_X86_32 - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); -#else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); -#endif - - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<<PG_LEVEL_2M)); - pos = end_pfn << PAGE_SHIFT; - } - -#ifdef CONFIG_X86_64 - /* big page (1G) range */ - start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & - ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); - pos = end_pfn << PAGE_SHIFT; - } - - /* tail is not big page (1G) alignment */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - if (start_pfn < end_pfn) { - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<<PG_LEVEL_2M)); - pos = end_pfn << PAGE_SHIFT; - } -#endif - - /* tail is not big page (2M) alignment */ - start_pfn = pos>>PAGE_SHIFT; - end_pfn = end>>PAGE_SHIFT; - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - - /* try to merge same page size and continuous */ - for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { - unsigned long old_start; - if (mr[i].end != mr[i+1].start || - mr[i].page_size_mask != mr[i+1].page_size_mask) - continue; - /* move it */ - old_start = mr[i].start; - memmove(&mr[i], &mr[i+1], - (nr_range - 1 - i) * sizeof(struct map_range)); - mr[i--].start = old_start; - nr_range--; - } - - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " %010lx - %010lx page %s\n", - mr[i].start, mr[i].end, - (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( - (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); - - /* - * Find space for the kernel direct mapping tables. - * - * Later we should allocate these tables in the local node of the - * memory mapped. Unfortunately this is done currently before the - * nodes are discovered. - */ - if (!after_bootmem) - find_early_table_space(end, use_pse, use_gbpages); - - for (i = 0; i < nr_range; i++) - ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, - mr[i].page_size_mask); - -#ifdef CONFIG_X86_32 - early_ioremap_page_table_range_init(); - - load_cr3(swapper_pg_dir); -#endif - - __flush_tlb_all(); - - /* - * Reserve the kernel pagetable pages we used (pgt_buf_start - - * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) - * so that they can be reused for other purposes. - * - * On native it just means calling memblock_reserve, on Xen it also - * means marking RW the pagetable pages that we allocated before - * but that haven't been used. - * - * In fact on xen we mark RO the whole range pgt_buf_start - - * pgt_buf_top, because we have to make sure that when - * init_memory_mapping reaches the pagetable pages area, it maps - * RO all the pagetable pages, including the ones that are beyond - * pgt_buf_end at that time. - */ - if (!after_bootmem && pgt_buf_end > pgt_buf_start) - x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), - PFN_PHYS(pgt_buf_end)); - - if (!after_bootmem) - early_memtest(start, end); - - return ret >> PAGE_SHIFT; -} - - -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains bios code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - if (pagenr <= 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) - return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; -} - -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr; - unsigned long begin_aligned, end_aligned; - - /* Make sure boundaries are page aligned */ - begin_aligned = PAGE_ALIGN(begin); - end_aligned = end & PAGE_MASK; - - if (WARN_ON(begin_aligned != begin || end_aligned != end)) { - begin = begin_aligned; - end = end_aligned; - } - - if (begin >= end) - return; - - addr = begin; - - /* - * If debugging page accesses then do not free this memory but - * mark them not present - any buggy init-section access will - * create a kernel page fault: - */ -#ifdef CONFIG_DEBUG_PAGEALLOC - printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", - begin, end); - set_memory_np(begin, (end - begin) >> PAGE_SHIFT); -#else - /* - * We just marked the kernel text read only above, now that - * we are going to free part of that, we need to make that - * writeable and non-executable first. - */ - set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); - set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - - for (; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } -#endif -} - -void free_initmem(void) -{ - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - /* - * end could be not aligned, and We can not align that, - * decompresser could be confused by aligned initrd_end - * We already reserve the end partial page before in - * - i386_start_kernel() - * - x86_64_start_kernel() - * - relocate_initrd() - * So here We can do PAGE_ALIGN() safely to get partial page to be freed - */ - free_init_pages("initrd memory", start, PAGE_ALIGN(end)); -} -#endif - -void __init zone_sizes_init(void) -{ - unsigned long max_zone_pfns[MAX_NR_ZONES]; - - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - -#ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; -#endif -#ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; -#endif - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; -#ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_HIGHMEM] = max_pfn; -#endif - - free_area_init_nodes(max_zone_pfns); -} - diff --git a/ANDROID_3.4.5/arch/x86/mm/init_32.c b/ANDROID_3.4.5/arch/x86/mm/init_32.c deleted file mode 100644 index 575d86f8..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/init_32.c +++ /dev/null @@ -1,959 +0,0 @@ -/* - * - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/swap.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/highmem.h> -#include <linux/pagemap.h> -#include <linux/pci.h> -#include <linux/pfn.h> -#include <linux/poison.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/proc_fs.h> -#include <linux/memory_hotplug.h> -#include <linux/initrd.h> -#include <linux/cpumask.h> -#include <linux/gfp.h> - -#include <asm/asm.h> -#include <asm/bios_ebda.h> -#include <asm/processor.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/dma.h> -#include <asm/fixmap.h> -#include <asm/e820.h> -#include <asm/apic.h> -#include <asm/bugs.h> -#include <asm/tlb.h> -#include <asm/tlbflush.h> -#include <asm/olpc_ofw.h> -#include <asm/pgalloc.h> -#include <asm/sections.h> -#include <asm/paravirt.h> -#include <asm/setup.h> -#include <asm/cacheflush.h> -#include <asm/page_types.h> -#include <asm/init.h> - -unsigned long highstart_pfn, highend_pfn; - -static noinline int do_test_wp_bit(void); - -bool __read_mostly __vmalloc_start_set = false; - -static __init void *alloc_low_page(void) -{ - unsigned long pfn = pgt_buf_end++; - void *adr; - - if (pfn >= pgt_buf_top) - panic("alloc_low_page: ran out of memory"); - - adr = __va(pfn * PAGE_SIZE); - clear_page(adr); - return adr; -} - -/* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_bootmem) - pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); - else - pmd_table = (pmd_t *)alloc_low_page(); - paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - BUG_ON(pmd_table != pmd_offset(pud, 0)); - - return pmd_table; - } -#endif - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); - - return pmd_table; -} - -/* - * Create a page table and place a pointer to it in a middle page - * directory entry: - */ -static pte_t * __init one_page_table_init(pmd_t *pmd) -{ - if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { - pte_t *page_table = NULL; - - if (after_bootmem) { -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) - page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); -#endif - if (!page_table) - page_table = - (pte_t *)alloc_bootmem_pages(PAGE_SIZE); - } else - page_table = (pte_t *)alloc_low_page(); - - paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); - BUG_ON(page_table != pte_offset_kernel(pmd, 0)); - } - - return pte_offset_kernel(pmd, 0); -} - -pmd_t * __init populate_extra_pmd(unsigned long vaddr) -{ - int pgd_idx = pgd_index(vaddr); - int pmd_idx = pmd_index(vaddr); - - return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx; -} - -pte_t * __init populate_extra_pte(unsigned long vaddr) -{ - int pte_idx = pte_index(vaddr); - pmd_t *pmd; - - pmd = populate_extra_pmd(vaddr); - return one_page_table_init(pmd) + pte_idx; -} - -static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, - unsigned long vaddr, pte_t *lastpte) -{ -#ifdef CONFIG_HIGHMEM - /* - * Something (early fixmap) may already have put a pte - * page here, which causes the page table allocation - * to become nonlinear. Attempt to fix it, and if it - * is still nonlinear then we have to bug. - */ - int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; - int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; - - if (pmd_idx_kmap_begin != pmd_idx_kmap_end - && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin - && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end - && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start - || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) { - pte_t *newpte; - int i; - - BUG_ON(after_bootmem); - newpte = alloc_low_page(); - for (i = 0; i < PTRS_PER_PTE; i++) - set_pte(newpte + i, pte[i]); - - paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); - set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); - BUG_ON(newpte != pte_offset_kernel(pmd, 0)); - __flush_tlb_all(); - - paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); - pte = newpte; - } - BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) - && vaddr > fix_to_virt(FIX_KMAP_END) - && lastpte && lastpte + PTRS_PER_PTE != pte); -#endif - return pte; -} - -/* - * This function initializes a certain range of kernel virtual memory - * with new bootmem page tables, everywhere page tables are missing in - * the given range. - * - * NOTE: The pagetables are allocated contiguous on the physical space - * so we can cache the place of the first one and move around without - * checking the pgd every time. - */ -static void __init -page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) -{ - int pgd_idx, pmd_idx; - unsigned long vaddr; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte = NULL; - - vaddr = start; - pgd_idx = pgd_index(vaddr); - pmd_idx = pmd_index(vaddr); - pgd = pgd_base + pgd_idx; - - for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - pmd = pmd + pmd_index(vaddr); - for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); - pmd++, pmd_idx++) { - pte = page_table_kmap_check(one_page_table_init(pmd), - pmd, vaddr, pte); - - vaddr += PMD_SIZE; - } - pmd_idx = 0; - } -} - -static inline int is_kernel_text(unsigned long addr) -{ - if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) - return 1; - return 0; -} - -/* - * This maps the physical memory to kernel virtual address space, a total - * of max_low_pfn pages, by creating page tables starting from address - * PAGE_OFFSET: - */ -unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) -{ - int use_pse = page_size_mask == (1<<PG_LEVEL_2M); - unsigned long last_map_addr = end; - unsigned long start_pfn, end_pfn; - pgd_t *pgd_base = swapper_pg_dir; - int pgd_idx, pmd_idx, pte_ofs; - unsigned long pfn; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - unsigned pages_2m, pages_4k; - int mapping_iter; - - start_pfn = start >> PAGE_SHIFT; - end_pfn = end >> PAGE_SHIFT; - - /* - * First iteration will setup identity mapping using large/small pages - * based on use_pse, with other attributes same as set by - * the early code in head_32.S - * - * Second iteration will setup the appropriate attributes (NX, GLOBAL..) - * as desired for the kernel identity mapping. - * - * This two pass mechanism conforms to the TLB app note which says: - * - * "Software should not write to a paging-structure entry in a way - * that would change, for any linear address, both the page size - * and either the page frame or attributes." - */ - mapping_iter = 1; - - if (!cpu_has_pse) - use_pse = 0; - -repeat: - pages_2m = pages_4k = 0; - pfn = start_pfn; - pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - - if (pfn >= end_pfn) - continue; -#ifdef CONFIG_X86_PAE - pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); - pmd += pmd_idx; -#else - pmd_idx = 0; -#endif - for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn; - pmd++, pmd_idx++) { - unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; - - /* - * Map with big pages if possible, otherwise - * create normal page tables: - */ - if (use_pse) { - unsigned int addr2; - pgprot_t prot = PAGE_KERNEL_LARGE; - /* - * first pass will use the same initial - * identity mapping attribute + _PAGE_PSE. - */ - pgprot_t init_prot = - __pgprot(PTE_IDENT_ATTR | - _PAGE_PSE); - - addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + - PAGE_OFFSET + PAGE_SIZE-1; - - if (is_kernel_text(addr) || - is_kernel_text(addr2)) - prot = PAGE_KERNEL_LARGE_EXEC; - - pages_2m++; - if (mapping_iter == 1) - set_pmd(pmd, pfn_pmd(pfn, init_prot)); - else - set_pmd(pmd, pfn_pmd(pfn, prot)); - - pfn += PTRS_PER_PTE; - continue; - } - pte = one_page_table_init(pmd); - - pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); - pte += pte_ofs; - for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn; - pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { - pgprot_t prot = PAGE_KERNEL; - /* - * first pass will use the same initial - * identity mapping attribute. - */ - pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); - - if (is_kernel_text(addr)) - prot = PAGE_KERNEL_EXEC; - - pages_4k++; - if (mapping_iter == 1) { - set_pte(pte, pfn_pte(pfn, init_prot)); - last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE; - } else - set_pte(pte, pfn_pte(pfn, prot)); - } - } - } - if (mapping_iter == 1) { - /* - * update direct mapping page count only in the first - * iteration. - */ - update_page_count(PG_LEVEL_2M, pages_2m); - update_page_count(PG_LEVEL_4K, pages_4k); - - /* - * local global flush tlb, which will flush the previous - * mappings present in both small and large page TLB's. - */ - __flush_tlb_all(); - - /* - * Second iteration will set the actual desired PTE attributes. - */ - mapping_iter = 2; - goto repeat; - } - return last_map_addr; -} - -pte_t *kmap_pte; -pgprot_t kmap_prot; - -static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) -{ - return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), - vaddr), vaddr), vaddr); -} - -static void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* - * Cache the first kmap pte: - */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} - -#ifdef CONFIG_HIGHMEM -static void __init permanent_kmaps_init(pgd_t *pgd_base) -{ - unsigned long vaddr; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - vaddr = PKMAP_BASE; - page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - - pgd = swapper_pg_dir + pgd_index(vaddr); - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; -} - -static void __init add_one_highpage_init(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; -} - -void __init add_highpages_with_active_regions(int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - phys_addr_t start, end; - u64 i; - - for_each_free_mem_range(i, nid, &start, &end, NULL) { - unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), - start_pfn, end_pfn); - unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), - start_pfn, end_pfn); - for ( ; pfn < e_pfn; pfn++) - if (pfn_valid(pfn)) - add_one_highpage_init(pfn_to_page(pfn)); - } -} -#else -static inline void permanent_kmaps_init(pgd_t *pgd_base) -{ -} -#endif /* CONFIG_HIGHMEM */ - -void __init native_pagetable_setup_start(pgd_t *base) -{ - unsigned long pfn, va; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - /* - * Remove any mappings which extend past the end of physical - * memory from the boot time page table: - */ - for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { - va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); - pgd = base + pgd_index(va); - if (!pgd_present(*pgd)) - break; - - pud = pud_offset(pgd, va); - pmd = pmd_offset(pud, va); - if (!pmd_present(*pmd)) - break; - - pte = pte_offset_kernel(pmd, va); - if (!pte_present(*pte)) - break; - - pte_clear(NULL, va, pte); - } - paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); -} - -void __init native_pagetable_setup_done(pgd_t *base) -{ -} - -/* - * Build a proper pagetable for the kernel mappings. Up until this - * point, we've been running on some set of pagetables constructed by - * the boot process. - * - * If we're booting on native hardware, this will be a pagetable - * constructed in arch/x86/kernel/head_32.S. The root of the - * pagetable will be swapper_pg_dir. - * - * If we're booting paravirtualized under a hypervisor, then there are - * more options: we may already be running PAE, and the pagetable may - * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_setup_start() will set up swapper_pg_dir - * appropriately for the rest of the initialization to work. - * - * In general, pagetable_init() assumes that the pagetable may already - * be partially populated, and so it avoids stomping on any existing - * mappings. - */ -void __init early_ioremap_page_table_range_init(void) -{ - pgd_t *pgd_base = swapper_pg_dir; - unsigned long vaddr, end; - - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; - page_table_range_init(vaddr, end, pgd_base); - early_ioremap_reset(); -} - -static void __init pagetable_init(void) -{ - pgd_t *pgd_base = swapper_pg_dir; - - permanent_kmaps_init(pgd_base); -} - -pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -/* user-defined highmem size */ -static unsigned int highmem_pages = -1; - -/* - * highmem=size forces highmem to be exactly 'size' bytes. - * This works even on boxes that have no highmem otherwise. - * This also works to reduce highmem size on bigger boxes. - */ -static int __init parse_highmem(char *arg) -{ - if (!arg) - return -EINVAL; - - highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; - return 0; -} -early_param("highmem", parse_highmem); - -#define MSG_HIGHMEM_TOO_BIG \ - "highmem size (%luMB) is bigger than pages available (%luMB)!\n" - -#define MSG_LOWMEM_TOO_SMALL \ - "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n" -/* - * All of RAM fits into lowmem - but if user wants highmem - * artificially via the highmem=x boot parameter then create - * it: - */ -void __init lowmem_pfn_init(void) -{ - /* max_low_pfn is 0, we already have early_res support */ - max_low_pfn = max_pfn; - - if (highmem_pages == -1) - highmem_pages = 0; -#ifdef CONFIG_HIGHMEM - if (highmem_pages >= max_pfn) { - printk(KERN_ERR MSG_HIGHMEM_TOO_BIG, - pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); - highmem_pages = 0; - } - if (highmem_pages) { - if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) { - printk(KERN_ERR MSG_LOWMEM_TOO_SMALL, - pages_to_mb(highmem_pages)); - highmem_pages = 0; - } - max_low_pfn -= highmem_pages; - } -#else - if (highmem_pages) - printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); -#endif -} - -#define MSG_HIGHMEM_TOO_SMALL \ - "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" - -#define MSG_HIGHMEM_TRIMMED \ - "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n" -/* - * We have more RAM than fits into lowmem - we try to put it into - * highmem, also taking the highmem=x boot parameter into account: - */ -void __init highmem_pfn_init(void) -{ - max_low_pfn = MAXMEM_PFN; - - if (highmem_pages == -1) - highmem_pages = max_pfn - MAXMEM_PFN; - - if (highmem_pages + MAXMEM_PFN < max_pfn) - max_pfn = MAXMEM_PFN + highmem_pages; - - if (highmem_pages + MAXMEM_PFN > max_pfn) { - printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL, - pages_to_mb(max_pfn - MAXMEM_PFN), - pages_to_mb(highmem_pages)); - highmem_pages = 0; - } -#ifndef CONFIG_HIGHMEM - /* Maximum memory usable is what is directly addressable */ - printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); - max_pfn = MAXMEM_PFN; -#else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_HIGHMEM64G - if (max_pfn > MAX_NONPAE_PFN) { - max_pfn = MAX_NONPAE_PFN; - printk(KERN_WARNING MSG_HIGHMEM_TRIMMED); - } -#endif /* !CONFIG_HIGHMEM64G */ -#endif /* !CONFIG_HIGHMEM */ -} - -/* - * Determine low and high memory ranges: - */ -void __init find_low_pfn_range(void) -{ - /* it could update max_pfn */ - - if (max_pfn <= MAXMEM_PFN) - lowmem_pfn_init(); - else - highmem_pfn_init(); -} - -#ifndef CONFIG_NEED_MULTIPLE_NODES -void __init initmem_init(void) -{ -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > max_low_pfn) - highstart_pfn = max_low_pfn; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - num_physpages = max_low_pfn; - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); - sparse_memory_present_with_active_regions(0); - -#ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; -#endif - __vmalloc_start_set = true; - - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); - - setup_bootmem_allocator(); -} -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ - -void __init setup_bootmem_allocator(void) -{ - printk(KERN_INFO " mapped low ram: 0 - %08lx\n", - max_pfn_mapped<<PAGE_SHIFT); - printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); - - after_bootmem = 1; -} - -/* - * paging_init() sets up the page tables - note that the first 8MB are - * already mapped by head.S. - * - * This routines also unmaps the page at virtual kernel address 0, so - * that we can trap those pesky NULL-reference errors in the kernel. - */ -void __init paging_init(void) -{ - pagetable_init(); - - __flush_tlb_all(); - - kmap_init(); - - /* - * NOTE: at this point the bootmem allocator is fully available. - */ - olpc_dt_build_devicetree(); - sparse_memory_present_with_active_regions(MAX_NUMNODES); - sparse_init(); - zone_sizes_init(); -} - -/* - * Test if the WP bit works in supervisor mode. It isn't supported on 386's - * and also on some strange 486's. All 586+'s are OK. This used to involve - * black magic jumps to work around some nasty CPU bugs, but fortunately the - * switch to using exceptions got rid of all that. - */ -static void __init test_wp_bit(void) -{ - printk(KERN_INFO - "Checking if this processor honours the WP bit even in supervisor mode..."); - - /* Any page-aligned address will do, the test is non-destructive */ - __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); - boot_cpu_data.wp_works_ok = do_test_wp_bit(); - clear_fixmap(FIX_WP_TEST); - - if (!boot_cpu_data.wp_works_ok) { - printk(KERN_CONT "No.\n"); -#ifdef CONFIG_X86_WP_WORKS_OK - panic( - "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); -#endif - } else { - printk(KERN_CONT "Ok.\n"); - } -} - -void __init mem_init(void) -{ - int codesize, reservedpages, datasize, initsize; - int tmp; - - pci_iommu_alloc(); - -#ifdef CONFIG_FLATMEM - BUG_ON(!mem_map); -#endif - /* - * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to - * be done before free_all_bootmem(). Memblock use free low memory for - * temporary data (see find_range_array()) and for this purpose can use - * pages that was already passed to the buddy allocator, hence marked as - * not accessible in the page tables when compiled with - * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not - * important here. - */ - set_highmem_pages_init(); - - /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - /* - * Only count reserved RAM pages: - */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk reserved, %dk data, %dk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); - - printk(KERN_INFO "virtual kernel memory layout:\n" - " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#ifdef CONFIG_HIGHMEM - " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#endif - " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" - " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" - " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", - FIXADDR_START, FIXADDR_TOP, - (FIXADDR_TOP - FIXADDR_START) >> 10, - -#ifdef CONFIG_HIGHMEM - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, - (LAST_PKMAP*PAGE_SIZE) >> 10, -#endif - - VMALLOC_START, VMALLOC_END, - (VMALLOC_END - VMALLOC_START) >> 20, - - (unsigned long)__va(0), (unsigned long)high_memory, - ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, - - (unsigned long)&__init_begin, (unsigned long)&__init_end, - ((unsigned long)&__init_end - - (unsigned long)&__init_begin) >> 10, - - (unsigned long)&_etext, (unsigned long)&_edata, - ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, - - (unsigned long)&_text, (unsigned long)&_etext, - ((unsigned long)&_etext - (unsigned long)&_text) >> 10); - - /* - * Check boundaries twice: Some fundamental inconsistencies can - * be detected at build time already. - */ -#define __FIXADDR_TOP (-PAGE_SIZE) -#ifdef CONFIG_HIGHMEM - BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); - BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE); -#endif -#define high_memory (-128UL << 20) - BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); -#undef high_memory -#undef __FIXADDR_TOP - -#ifdef CONFIG_HIGHMEM - BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); - BUG_ON(VMALLOC_END > PKMAP_BASE); -#endif - BUG_ON(VMALLOC_START >= VMALLOC_END); - BUG_ON((unsigned long)high_memory > VMALLOC_START); - - if (boot_cpu_data.wp_works_ok < 0) - test_wp_bit(); -} - -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size) -{ - struct pglist_data *pgdata = NODE_DATA(nid); - struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - - return __add_pages(nid, zone, start_pfn, nr_pages); -} -#endif - -/* - * This function cannot be __init, since exceptions don't work in that - * section. Put this after the callers, so that it cannot be inlined. - */ -static noinline int do_test_wp_bit(void) -{ - char tmp_reg; - int flag; - - __asm__ __volatile__( - " movb %0, %1 \n" - "1: movb %1, %0 \n" - " xorl %2, %2 \n" - "2: \n" - _ASM_EXTABLE(1b,2b) - :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), - "=q" (tmp_reg), - "=r" (flag) - :"2" (1) - :"memory"); - - return flag; -} - -#ifdef CONFIG_DEBUG_RODATA -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - -int kernel_set_to_readonly __read_mostly; - -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, start+size); - - set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, start+size); - - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); -} - -static void mark_nxdata_nx(void) -{ - /* - * When this called, init has already been executed and released, - * so everything past _etext should be NX. - */ - unsigned long start = PFN_ALIGN(_etext); - /* - * This comes from is_kernel_text upper limit. Also HPAGE where used: - */ - unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; - - if (__supported_pte_mask & _PAGE_NX) - printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); - set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); -} - -void mark_rodata_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); - printk(KERN_INFO "Write protecting the kernel text: %luk\n", - size >> 10); - - kernel_set_to_readonly = 1; - -#ifdef CONFIG_CPA_DEBUG - printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", - start, start+size); - set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT); - - printk(KERN_INFO "Testing CPA: write protecting again\n"); - set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); -#endif - - start += size; - size = (unsigned long)__end_rodata - start; - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); - printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", - size >> 10); - rodata_test(); - -#ifdef CONFIG_CPA_DEBUG - printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); - set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); - - printk(KERN_INFO "Testing CPA: write protecting again\n"); - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); -#endif - mark_nxdata_nx(); -} -#endif - diff --git a/ANDROID_3.4.5/arch/x86/mm/init_64.c b/ANDROID_3.4.5/arch/x86/mm/init_64.c deleted file mode 100644 index fc18be0f..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/init_64.c +++ /dev/null @@ -1,988 +0,0 @@ -/* - * linux/arch/x86_64/mm/init.c - * - * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2000 Pavel Machek <pavel@ucw.cz> - * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> - */ - -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/initrd.h> -#include <linux/pagemap.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/proc_fs.h> -#include <linux/pci.h> -#include <linux/pfn.h> -#include <linux/poison.h> -#include <linux/dma-mapping.h> -#include <linux/module.h> -#include <linux/memory.h> -#include <linux/memory_hotplug.h> -#include <linux/nmi.h> -#include <linux/gfp.h> - -#include <asm/processor.h> -#include <asm/bios_ebda.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/dma.h> -#include <asm/fixmap.h> -#include <asm/e820.h> -#include <asm/apic.h> -#include <asm/tlb.h> -#include <asm/mmu_context.h> -#include <asm/proto.h> -#include <asm/smp.h> -#include <asm/sections.h> -#include <asm/kdebug.h> -#include <asm/numa.h> -#include <asm/cacheflush.h> -#include <asm/init.h> -#include <asm/uv/uv.h> -#include <asm/setup.h> - -static int __init parse_direct_gbpages_off(char *arg) -{ - direct_gbpages = 0; - return 0; -} -early_param("nogbpages", parse_direct_gbpages_off); - -static int __init parse_direct_gbpages_on(char *arg) -{ - direct_gbpages = 1; - return 0; -} -early_param("gbpages", parse_direct_gbpages_on); - -/* - * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the - * physical space so we can cache the place of the first one and move - * around without checking the pgd every time. - */ - -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -int force_personality32; - -/* - * noexec32=on|off - * Control non executable heap for 32bit processes. - * To control the stack too use noexec=off - * - * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) - * off PROT_READ implies PROT_EXEC - */ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - -/* - * When memory was added/removed make sure all the processes MM have - * suitable PGD entries in the local PGD level page. - */ -void sync_global_pgds(unsigned long start, unsigned long end) -{ - unsigned long address; - - for (address = start; address <= end; address += PGDIR_SIZE) { - const pgd_t *pgd_ref = pgd_offset_k(address); - struct page *page; - - if (pgd_none(*pgd_ref)) - continue; - - spin_lock(&pgd_lock); - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - spinlock_t *pgt_lock; - - pgd = (pgd_t *)page_address(page) + pgd_index(address); - /* the pgt_lock only for Xen */ - pgt_lock = &pgd_page_get_mm(page)->page_table_lock; - spin_lock(pgt_lock); - - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) - != pgd_page_vaddr(*pgd_ref)); - - spin_unlock(pgt_lock); - } - spin_unlock(&pgd_lock); - } -} - -/* - * NOTE: This function is marked __ref because it calls __init function - * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. - */ -static __ref void *spp_getpage(void) -{ - void *ptr; - - if (after_bootmem) - ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); - else - ptr = alloc_bootmem_pages(PAGE_SIZE); - - if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) { - panic("set_pte_phys: cannot allocate page data %s\n", - after_bootmem ? "after bootmem" : ""); - } - - pr_debug("spp_getpage %p\n", ptr); - - return ptr; -} - -static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr) -{ - if (pgd_none(*pgd)) { - pud_t *pud = (pud_t *)spp_getpage(); - pgd_populate(&init_mm, pgd, pud); - if (pud != pud_offset(pgd, 0)) - printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", - pud, pud_offset(pgd, 0)); - } - return pud_offset(pgd, vaddr); -} - -static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr) -{ - if (pud_none(*pud)) { - pmd_t *pmd = (pmd_t *) spp_getpage(); - pud_populate(&init_mm, pud, pmd); - if (pmd != pmd_offset(pud, 0)) - printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", - pmd, pmd_offset(pud, 0)); - } - return pmd_offset(pud, vaddr); -} - -static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr) -{ - if (pmd_none(*pmd)) { - pte_t *pte = (pte_t *) spp_getpage(); - pmd_populate_kernel(&init_mm, pmd, pte); - if (pte != pte_offset_kernel(pmd, 0)) - printk(KERN_ERR "PAGETABLE BUG #02!\n"); - } - return pte_offset_kernel(pmd, vaddr); -} - -void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) -{ - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pud = pud_page + pud_index(vaddr); - pmd = fill_pmd(pud, vaddr); - pte = fill_pte(pmd, vaddr); - - set_pte(pte, new_pte); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -void set_pte_vaddr(unsigned long vaddr, pte_t pteval) -{ - pgd_t *pgd; - pud_t *pud_page; - - pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval)); - - pgd = pgd_offset_k(vaddr); - if (pgd_none(*pgd)) { - printk(KERN_ERR - "PGD FIXMAP MISSING, it should be setup in head.S!\n"); - return; - } - pud_page = (pud_t*)pgd_page_vaddr(*pgd); - set_pte_vaddr_pud(pud_page, vaddr, pteval); -} - -pmd_t * __init populate_extra_pmd(unsigned long vaddr) -{ - pgd_t *pgd; - pud_t *pud; - - pgd = pgd_offset_k(vaddr); - pud = fill_pud(pgd, vaddr); - return fill_pmd(pud, vaddr); -} - -pte_t * __init populate_extra_pte(unsigned long vaddr) -{ - pmd_t *pmd; - - pmd = populate_extra_pmd(vaddr); - return fill_pte(pmd, vaddr); -} - -/* - * Create large page table mappings for a range of physical addresses. - */ -static void __init __init_extra_mapping(unsigned long phys, unsigned long size, - pgprot_t prot) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); - for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { - pgd = pgd_offset_k((unsigned long)__va(phys)); - if (pgd_none(*pgd)) { - pud = (pud_t *) spp_getpage(); - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | - _PAGE_USER)); - } - pud = pud_offset(pgd, (unsigned long)__va(phys)); - if (pud_none(*pud)) { - pmd = (pmd_t *) spp_getpage(); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | - _PAGE_USER)); - } - pmd = pmd_offset(pud, phys); - BUG_ON(!pmd_none(*pmd)); - set_pmd(pmd, __pmd(phys | pgprot_val(prot))); - } -} - -void __init init_extra_mapping_wb(unsigned long phys, unsigned long size) -{ - __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE); -} - -void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) -{ - __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE); -} - -/* - * The head.S code sets up the kernel high mapping: - * - * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) - * - * phys_addr holds the negative offset to the kernel, which is added - * to the compile time generated pmds. This results in invalid pmds up - * to the point where we hit the physaddr 0 mapping. - * - * We limit the mappings to the region from _text to _brk_end. _brk_end - * is rounded up to the 2MB boundary. This catches the invalid pmds as - * well, as they are located before _text: - */ -void __init cleanup_highmap(void) -{ - unsigned long vaddr = __START_KERNEL_map; - unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); - unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; - pmd_t *pmd = level2_kernel_pgt; - - for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { - if (pmd_none(*pmd)) - continue; - if (vaddr < (unsigned long) _text || vaddr > end) - set_pmd(pmd, __pmd(0)); - } -} - -static __ref void *alloc_low_page(unsigned long *phys) -{ - unsigned long pfn = pgt_buf_end++; - void *adr; - - if (after_bootmem) { - adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); - *phys = __pa(adr); - - return adr; - } - - if (pfn >= pgt_buf_top) - panic("alloc_low_page: ran out of memory"); - - adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); - clear_page(adr); - *phys = pfn * PAGE_SIZE; - return adr; -} - -static __ref void *map_low_page(void *virt) -{ - void *adr; - unsigned long phys, left; - - if (after_bootmem) - return virt; - - phys = __pa(virt); - left = phys & (PAGE_SIZE - 1); - adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE); - adr = (void *)(((unsigned long)adr) | left); - - return adr; -} - -static __ref void unmap_low_page(void *adr) -{ - if (after_bootmem) - return; - - early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE); -} - -static unsigned long __meminit -phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, - pgprot_t prot) -{ - unsigned pages = 0; - unsigned long last_map_addr = end; - int i; - - pte_t *pte = pte_page + pte_index(addr); - - for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { - - if (addr >= end) { - if (!after_bootmem) { - for(; i < PTRS_PER_PTE; i++, pte++) - set_pte(pte, __pte(0)); - } - break; - } - - /* - * We will re-use the existing mapping. - * Xen for example has some special requirements, like mapping - * pagetable pages as RO. So assume someone who pre-setup - * these mappings are more intelligent. - */ - if (pte_val(*pte)) { - pages++; - continue; - } - - if (0) - printk(" pte=%p addr=%lx pte=%016lx\n", - pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); - pages++; - set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); - last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; - } - - update_page_count(PG_LEVEL_4K, pages); - - return last_map_addr; -} - -static unsigned long __meminit -phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, - unsigned long page_size_mask, pgprot_t prot) -{ - unsigned long pages = 0; - unsigned long last_map_addr = end; - - int i = pmd_index(address); - - for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { - unsigned long pte_phys; - pmd_t *pmd = pmd_page + pmd_index(address); - pte_t *pte; - pgprot_t new_prot = prot; - - if (address >= end) { - if (!after_bootmem) { - for (; i < PTRS_PER_PMD; i++, pmd++) - set_pmd(pmd, __pmd(0)); - } - break; - } - - if (pmd_val(*pmd)) { - if (!pmd_large(*pmd)) { - spin_lock(&init_mm.page_table_lock); - pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); - last_map_addr = phys_pte_init(pte, address, - end, prot); - unmap_low_page(pte); - spin_unlock(&init_mm.page_table_lock); - continue; - } - /* - * If we are ok with PG_LEVEL_2M mapping, then we will - * use the existing mapping, - * - * Otherwise, we will split the large page mapping but - * use the same existing protection bits except for - * large page, so that we don't violate Intel's TLB - * Application note (317080) which says, while changing - * the page sizes, new and old translations should - * not differ with respect to page frame and - * attributes. - */ - if (page_size_mask & (1 << PG_LEVEL_2M)) { - pages++; - continue; - } - new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); - } - - if (page_size_mask & (1<<PG_LEVEL_2M)) { - pages++; - spin_lock(&init_mm.page_table_lock); - set_pte((pte_t *)pmd, - pfn_pte(address >> PAGE_SHIFT, - __pgprot(pgprot_val(prot) | _PAGE_PSE))); - spin_unlock(&init_mm.page_table_lock); - last_map_addr = (address & PMD_MASK) + PMD_SIZE; - continue; - } - - pte = alloc_low_page(&pte_phys); - last_map_addr = phys_pte_init(pte, address, end, new_prot); - unmap_low_page(pte); - - spin_lock(&init_mm.page_table_lock); - pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); - spin_unlock(&init_mm.page_table_lock); - } - update_page_count(PG_LEVEL_2M, pages); - return last_map_addr; -} - -static unsigned long __meminit -phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, - unsigned long page_size_mask) -{ - unsigned long pages = 0; - unsigned long last_map_addr = end; - int i = pud_index(addr); - - for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { - unsigned long pmd_phys; - pud_t *pud = pud_page + pud_index(addr); - pmd_t *pmd; - pgprot_t prot = PAGE_KERNEL; - - if (addr >= end) - break; - - if (!after_bootmem && - !e820_any_mapped(addr, addr+PUD_SIZE, 0)) { - set_pud(pud, __pud(0)); - continue; - } - - if (pud_val(*pud)) { - if (!pud_large(*pud)) { - pmd = map_low_page(pmd_offset(pud, 0)); - last_map_addr = phys_pmd_init(pmd, addr, end, - page_size_mask, prot); - unmap_low_page(pmd); - __flush_tlb_all(); - continue; - } - /* - * If we are ok with PG_LEVEL_1G mapping, then we will - * use the existing mapping. - * - * Otherwise, we will split the gbpage mapping but use - * the same existing protection bits except for large - * page, so that we don't violate Intel's TLB - * Application note (317080) which says, while changing - * the page sizes, new and old translations should - * not differ with respect to page frame and - * attributes. - */ - if (page_size_mask & (1 << PG_LEVEL_1G)) { - pages++; - continue; - } - prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); - } - - if (page_size_mask & (1<<PG_LEVEL_1G)) { - pages++; - spin_lock(&init_mm.page_table_lock); - set_pte((pte_t *)pud, - pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); - spin_unlock(&init_mm.page_table_lock); - last_map_addr = (addr & PUD_MASK) + PUD_SIZE; - continue; - } - - pmd = alloc_low_page(&pmd_phys); - last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, - prot); - unmap_low_page(pmd); - - spin_lock(&init_mm.page_table_lock); - pud_populate(&init_mm, pud, __va(pmd_phys)); - spin_unlock(&init_mm.page_table_lock); - } - __flush_tlb_all(); - - update_page_count(PG_LEVEL_1G, pages); - - return last_map_addr; -} - -unsigned long __meminit -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask) -{ - bool pgd_changed = false; - unsigned long next, last_map_addr = end; - unsigned long addr; - - start = (unsigned long)__va(start); - end = (unsigned long)__va(end); - addr = start; - - for (; start < end; start = next) { - pgd_t *pgd = pgd_offset_k(start); - unsigned long pud_phys; - pud_t *pud; - - next = (start + PGDIR_SIZE) & PGDIR_MASK; - if (next > end) - next = end; - - if (pgd_val(*pgd)) { - pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); - last_map_addr = phys_pud_init(pud, __pa(start), - __pa(end), page_size_mask); - unmap_low_page(pud); - continue; - } - - pud = alloc_low_page(&pud_phys); - last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), - page_size_mask); - unmap_low_page(pud); - - spin_lock(&init_mm.page_table_lock); - pgd_populate(&init_mm, pgd, __va(pud_phys)); - spin_unlock(&init_mm.page_table_lock); - pgd_changed = true; - } - - if (pgd_changed) - sync_global_pgds(addr, end); - - __flush_tlb_all(); - - return last_map_addr; -} - -#ifndef CONFIG_NUMA -void __init initmem_init(void) -{ - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); -} -#endif - -void __init paging_init(void) -{ - sparse_memory_present_with_active_regions(MAX_NUMNODES); - sparse_init(); - - /* - * clear the default setting with node 0 - * note: don't use nodes_clear here, that is really clearing when - * numa support is not compiled in, and later node_set_state - * will not set it back. - */ - node_clear_state(0, N_NORMAL_MEMORY); - - zone_sizes_init(); -} - -/* - * Memory hotplug specific functions - */ -#ifdef CONFIG_MEMORY_HOTPLUG -/* - * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need - * updating. - */ -static void update_end_of_memory_vars(u64 start, u64 size) -{ - unsigned long end_pfn = PFN_UP(start + size); - - if (end_pfn > max_pfn) { - max_pfn = end_pfn; - max_low_pfn = end_pfn; - high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; - } -} - -/* - * Memory is added always to NORMAL zone. This means you will never get - * additional DMA/DMA32 memory. - */ -int arch_add_memory(int nid, u64 start, u64 size) -{ - struct pglist_data *pgdat = NODE_DATA(nid); - struct zone *zone = pgdat->node_zones + ZONE_NORMAL; - unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - int ret; - - last_mapped_pfn = init_memory_mapping(start, start + size); - if (last_mapped_pfn > max_pfn_mapped) - max_pfn_mapped = last_mapped_pfn; - - ret = __add_pages(nid, zone, start_pfn, nr_pages); - WARN_ON_ONCE(ret); - - /* update max_pfn, max_low_pfn and high_memory */ - update_end_of_memory_vars(start, size); - - return ret; -} -EXPORT_SYMBOL_GPL(arch_add_memory); - -#endif /* CONFIG_MEMORY_HOTPLUG */ - -static struct kcore_list kcore_vsyscall; - -void __init mem_init(void) -{ - long codesize, reservedpages, datasize, initsize; - unsigned long absent_pages; - - pci_iommu_alloc(); - - /* clear_bss() already clear the empty_zero_page */ - - reservedpages = 0; - - /* this will put all low memory onto the freelists */ -#ifdef CONFIG_NUMA - totalram_pages = numa_free_all_bootmem(); -#else - totalram_pages = free_all_bootmem(); -#endif - - absent_pages = absent_pages_in_range(0, max_pfn); - reservedpages = max_pfn - totalram_pages - absent_pages; - after_bootmem = 1; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - /* Register memory areas for /proc/kcore */ - kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, - VSYSCALL_END - VSYSCALL_START, KCORE_OTHER); - - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_pfn << (PAGE_SHIFT-10), - codesize >> 10, - absent_pages << (PAGE_SHIFT-10), - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); -} - -#ifdef CONFIG_DEBUG_RODATA -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - -int kernel_set_to_readonly; - -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, end); - - /* - * Make the kernel identity mapping for text RW. Kernel text - * mapping will always be RO. Refer to the comment in - * static_protections() in pageattr.c - */ - set_memory_rw(start, (end - start) >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, end); - - /* - * Set the kernel identity mapping for text RO. - */ - set_memory_ro(start, (end - start) >> PAGE_SHIFT); -} - -void mark_rodata_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long rodata_start = - ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; - unsigned long end = (unsigned long) &__end_rodata_hpage_align; - unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); - unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); - unsigned long data_start = (unsigned long) &_sdata; - - printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", - (end - start) >> 10); - set_memory_ro(start, (end - start) >> PAGE_SHIFT); - - kernel_set_to_readonly = 1; - - /* - * The rodata section (but not the kernel text!) should also be - * not-executable. - */ - set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); - - rodata_test(); - -#ifdef CONFIG_CPA_DEBUG - printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); - set_memory_rw(start, (end-start) >> PAGE_SHIFT); - - printk(KERN_INFO "Testing CPA: again\n"); - set_memory_ro(start, (end-start) >> PAGE_SHIFT); -#endif - - free_init_pages("unused kernel memory", - (unsigned long) page_address(virt_to_page(text_end)), - (unsigned long) - page_address(virt_to_page(rodata_start))); - free_init_pages("unused kernel memory", - (unsigned long) page_address(virt_to_page(rodata_end)), - (unsigned long) page_address(virt_to_page(data_start))); -} - -#endif - -int kern_addr_valid(unsigned long addr) -{ - unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (above != 0 && above != -1UL) - return 0; - - pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) - return 0; - - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) - return 0; - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) - return 0; - - if (pmd_large(*pmd)) - return pfn_valid(pmd_pfn(*pmd)); - - pte = pte_offset_kernel(pmd, addr); - if (pte_none(*pte)) - return 0; - - return pfn_valid(pte_pfn(*pte)); -} - -/* - * A pseudo VMA to allow ptrace access for the vsyscall page. This only - * covers the 64bit vsyscall page now. 32bit has a real VMA now and does - * not need special handling anymore: - */ -static struct vm_area_struct gate_vma = { - .vm_start = VSYSCALL_START, - .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE), - .vm_page_prot = PAGE_READONLY_EXEC, - .vm_flags = VM_READ | VM_EXEC -}; - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ -#ifdef CONFIG_IA32_EMULATION - if (!mm || mm->context.ia32_compat) - return NULL; -#endif - return &gate_vma; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - struct vm_area_struct *vma = get_gate_vma(mm); - - if (!vma) - return 0; - - return (addr >= vma->vm_start) && (addr < vma->vm_end); -} - -/* - * Use this when you have no reliable mm, typically from interrupt - * context. It is less reliable than using a task's mm and may give - * false positives. - */ -int in_gate_area_no_mm(unsigned long addr) -{ - return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); -} - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) - return "[vdso]"; - if (vma == &gate_vma) - return "[vsyscall]"; - return NULL; -} - -#ifdef CONFIG_X86_UV -unsigned long memory_block_size_bytes(void) -{ - if (is_uv_system()) { - printk(KERN_INFO "UV: memory block size 2GB\n"); - return 2UL * 1024 * 1024 * 1024; - } - return MIN_MEMORY_BLOCK_SIZE; -} -#endif - -#ifdef CONFIG_SPARSEMEM_VMEMMAP -/* - * Initialise the sparsemem vmemmap using huge-pages at the PMD level. - */ -static long __meminitdata addr_start, addr_end; -static void __meminitdata *p_start, *p_end; -static int __meminitdata node_start; - -int __meminit -vmemmap_populate(struct page *start_page, unsigned long size, int node) -{ - unsigned long addr = (unsigned long)start_page; - unsigned long end = (unsigned long)(start_page + size); - unsigned long next; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - for (; addr < end; addr = next) { - void *p = NULL; - - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) - return -ENOMEM; - - pud = vmemmap_pud_populate(pgd, addr, node); - if (!pud) - return -ENOMEM; - - if (!cpu_has_pse) { - next = (addr + PAGE_SIZE) & PAGE_MASK; - pmd = vmemmap_pmd_populate(pud, addr, node); - - if (!pmd) - return -ENOMEM; - - p = vmemmap_pte_populate(pmd, addr, node); - - if (!p) - return -ENOMEM; - - addr_end = addr + PAGE_SIZE; - p_end = p + PAGE_SIZE; - } else { - next = pmd_addr_end(addr, end); - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - pte_t entry; - - p = vmemmap_alloc_block_buf(PMD_SIZE, node); - if (!p) - return -ENOMEM; - - entry = pfn_pte(__pa(p) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE); - set_pmd(pmd, __pmd(pte_val(entry))); - - /* check to see if we have contiguous blocks */ - if (p_end != p || node_start != node) { - if (p_start) - printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", - addr_start, addr_end-1, p_start, p_end-1, node_start); - addr_start = addr; - node_start = node; - p_start = p; - } - - addr_end = addr + PMD_SIZE; - p_end = p + PMD_SIZE; - } else - vmemmap_verify((pte_t *)pmd, node, addr, next); - } - - } - sync_global_pgds((unsigned long)start_page, end); - return 0; -} - -void __meminit vmemmap_populate_print_last(void) -{ - if (p_start) { - printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", - addr_start, addr_end-1, p_start, p_end-1, node_start); - p_start = NULL; - p_end = NULL; - node_start = 0; - } -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/iomap_32.c b/ANDROID_3.4.5/arch/x86/mm/iomap_32.c deleted file mode 100644 index 7b179b49..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/iomap_32.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright © 2008 Ingo Molnar - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - */ - -#include <asm/iomap.h> -#include <asm/pat.h> -#include <linux/module.h> -#include <linux/highmem.h> - -static int is_io_mapping_possible(resource_size_t base, unsigned long size) -{ -#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) - /* There is no way to map greater than 1 << 32 address without PAE */ - if (base + size > 0x100000000ULL) - return 0; -#endif - return 1; -} - -int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) -{ - unsigned long flag = _PAGE_CACHE_WC; - int ret; - - if (!is_io_mapping_possible(base, size)) - return -EINVAL; - - ret = io_reserve_memtype(base, base + size, &flag); - if (ret) - return ret; - - *prot = __pgprot(__PAGE_KERNEL | flag); - return 0; -} -EXPORT_SYMBOL_GPL(iomap_create_wc); - -void iomap_free(resource_size_t base, unsigned long size) -{ - io_free_memtype(base, base + size); -} -EXPORT_SYMBOL_GPL(iomap_free); - -void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - pagefault_disable(); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void *)vaddr; -} - -/* - * Map 'pfn' using protections 'prot' - */ -void __iomem * -iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) -{ - /* - * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. - * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the - * MTRR is UC or WC. UC_MINUS gets the real intention, of the - * user, which is "WC if the MTRR is WC, UC if you can't do that." - */ - if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) - prot = PAGE_KERNEL_UC_MINUS; - - return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot); -} -EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); - -void -iounmap_atomic(void __iomem *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - - if (vaddr >= __fix_to_virt(FIX_KMAP_END) && - vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { - int idx, type; - - type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR * smp_processor_id(); - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); -#endif - /* - * Force other mappings to Oops if they'll try to access this - * pte without first remap it. Keeping stale mappings around - * is a bad idea also, in case the page changes cacheability - * attributes or becomes a protected page in a hypervisor. - */ - kpte_clear_flush(kmap_pte-idx, vaddr); - kmap_atomic_idx_pop(); - } - - pagefault_enable(); -} -EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/ANDROID_3.4.5/arch/x86/mm/ioremap.c b/ANDROID_3.4.5/arch/x86/mm/ioremap.c deleted file mode 100644 index be1ef574..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/ioremap.c +++ /dev/null @@ -1,628 +0,0 @@ -/* - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - * - * (C) Copyright 1995 1996 Linus Torvalds - */ - -#include <linux/bootmem.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/mmiotrace.h> - -#include <asm/cacheflush.h> -#include <asm/e820.h> -#include <asm/fixmap.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/pgalloc.h> -#include <asm/pat.h> - -#include "physaddr.h" - -/* - * Fix up the linear direct mapping of the kernel to avoid cache attribute - * conflicts. - */ -int ioremap_change_attr(unsigned long vaddr, unsigned long size, - unsigned long prot_val) -{ - unsigned long nrpages = size >> PAGE_SHIFT; - int err; - - switch (prot_val) { - case _PAGE_CACHE_UC: - default: - err = _set_memory_uc(vaddr, nrpages); - break; - case _PAGE_CACHE_WC: - err = _set_memory_wc(vaddr, nrpages); - break; - case _PAGE_CACHE_WB: - err = _set_memory_wb(vaddr, nrpages); - break; - } - - return err; -} - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -static void __iomem *__ioremap_caller(resource_size_t phys_addr, - unsigned long size, unsigned long prot_val, void *caller) -{ - unsigned long offset, vaddr; - resource_size_t pfn, last_pfn, last_addr; - const resource_size_t unaligned_phys_addr = phys_addr; - const unsigned long unaligned_size = size; - struct vm_struct *area; - unsigned long new_prot_val; - pgprot_t prot; - int retval; - void __iomem *ret_addr; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - - if (!phys_addr_valid(phys_addr)) { - printk(KERN_WARNING "ioremap: invalid physical address %llx\n", - (unsigned long long)phys_addr); - WARN_ON_ONCE(1); - return NULL; - } - - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (is_ISA_range(phys_addr, last_addr)) - return (__force void __iomem *)phys_to_virt(phys_addr); - - /* - * Don't allow anybody to remap normal RAM that we're using.. - */ - last_pfn = last_addr >> PAGE_SHIFT; - for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { - int is_ram = page_is_ram(pfn); - - if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) - return NULL; - WARN_ON_ONCE(is_ram); - } - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - phys_addr; - - retval = reserve_memtype(phys_addr, (u64)phys_addr + size, - prot_val, &new_prot_val); - if (retval) { - printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); - return NULL; - } - - if (prot_val != new_prot_val) { - if (!is_new_memtype_allowed(phys_addr, size, - prot_val, new_prot_val)) { - printk(KERN_ERR - "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", - (unsigned long long)phys_addr, - (unsigned long long)(phys_addr + size), - prot_val, new_prot_val); - goto err_free_memtype; - } - prot_val = new_prot_val; - } - - switch (prot_val) { - case _PAGE_CACHE_UC: - default: - prot = PAGE_KERNEL_IO_NOCACHE; - break; - case _PAGE_CACHE_UC_MINUS: - prot = PAGE_KERNEL_IO_UC_MINUS; - break; - case _PAGE_CACHE_WC: - prot = PAGE_KERNEL_IO_WC; - break; - case _PAGE_CACHE_WB: - prot = PAGE_KERNEL_IO; - break; - } - - /* - * Ok, go for it.. - */ - area = get_vm_area_caller(size, VM_IOREMAP, caller); - if (!area) - goto err_free_memtype; - area->phys_addr = phys_addr; - vaddr = (unsigned long) area->addr; - - if (kernel_map_sync_memtype(phys_addr, size, prot_val)) - goto err_free_area; - - if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) - goto err_free_area; - - ret_addr = (void __iomem *) (vaddr + offset); - mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); - - /* - * Check if the request spans more than any BAR in the iomem resource - * tree. - */ - WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size), - KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); - - return ret_addr; -err_free_area: - free_vm_area(area); -err_free_memtype: - free_memtype(phys_addr, phys_addr + size); - return NULL; -} - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - * - * Must be freed with iounmap. - */ -void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) -{ - /* - * Ideally, this should be: - * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; - * - * Till we fix all X drivers to use ioremap_wc(), we will use - * UC MINUS. - */ - unsigned long val = _PAGE_CACHE_UC_MINUS; - - return __ioremap_caller(phys_addr, size, val, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap_nocache); - -/** - * ioremap_wc - map memory into CPU space write combined - * @offset: bus address of the memory - * @size: size of the resource to map - * - * This version of ioremap ensures that the memory is marked write combining. - * Write combining allows faster writes to some hardware devices. - * - * Must be freed with iounmap. - */ -void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) -{ - if (pat_enabled) - return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, - __builtin_return_address(0)); - else - return ioremap_nocache(phys_addr, size); -} -EXPORT_SYMBOL(ioremap_wc); - -void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) -{ - return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap_cache); - -void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, - unsigned long prot_val) -{ - return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), - __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap_prot); - -/** - * iounmap - Free a IO remapping - * @addr: virtual address from ioremap_* - * - * Caller must ensure there is only one unmapping for the same pointer. - */ -void iounmap(volatile void __iomem *addr) -{ - struct vm_struct *p, *o; - - if ((void __force *)addr <= high_memory) - return; - - /* - * __ioremap special-cases the PCI/ISA range by not instantiating a - * vm_area and by simply returning an address into the kernel mapping - * of ISA space. So handle that here. - */ - if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && - (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) - return; - - addr = (volatile void __iomem *) - (PAGE_MASK & (unsigned long __force)addr); - - mmiotrace_iounmap(addr); - - /* Use the vm area unlocked, assuming the caller - ensures there isn't another iounmap for the same address - in parallel. Reuse of the virtual address is prevented by - leaving it in the global lists until we're done with it. - cpa takes care of the direct mappings. */ - read_lock(&vmlist_lock); - for (p = vmlist; p; p = p->next) { - if (p->addr == (void __force *)addr) - break; - } - read_unlock(&vmlist_lock); - - if (!p) { - printk(KERN_ERR "iounmap: bad address %p\n", addr); - dump_stack(); - return; - } - - free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); - - /* Finally remove it */ - o = remove_vm_area((void __force *)addr); - BUG_ON(p != o || o == NULL); - kfree(p); -} -EXPORT_SYMBOL(iounmap); - -/* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access - */ -void *xlate_dev_mem_ptr(unsigned long phys) -{ - void *addr; - unsigned long start = phys & PAGE_MASK; - - /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ - if (page_is_ram(start >> PAGE_SHIFT)) - return __va(phys); - - addr = (void __force *)ioremap_cache(start, PAGE_SIZE); - if (addr) - addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); - - return addr; -} - -void unxlate_dev_mem_ptr(unsigned long phys, void *addr) -{ - if (page_is_ram(phys >> PAGE_SHIFT)) - return; - - iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); - return; -} - -static int __initdata early_ioremap_debug; - -static int __init early_ioremap_debug_setup(char *str) -{ - early_ioremap_debug = 1; - - return 0; -} -early_param("early_ioremap_debug", early_ioremap_debug_setup); - -static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; - -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) -{ - /* Don't assume we're using swapper_pg_dir at this point */ - pgd_t *base = __va(read_cr3()); - pgd_t *pgd = &base[pgd_index(addr)]; - pud_t *pud = pud_offset(pgd, addr); - pmd_t *pmd = pmd_offset(pud, addr); - - return pmd; -} - -static inline pte_t * __init early_ioremap_pte(unsigned long addr) -{ - return &bm_pte[pte_index(addr)]; -} - -bool __init is_early_ioremap_ptep(pte_t *ptep) -{ - return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; -} - -static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; - -void __init early_ioremap_init(void) -{ - pmd_t *pmd; - int i; - - if (early_ioremap_debug) - printk(KERN_INFO "early_ioremap_init()\n"); - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); - - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - memset(bm_pte, 0, sizeof(bm_pte)); - pmd_populate_kernel(&init_mm, pmd, bm_pte); - - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ -#define __FIXADDR_TOP (-PAGE_SIZE) - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); -#undef __FIXADDR_TOP - if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - printk(KERN_WARNING "pmd %p != %p\n", - pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); - printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); - printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); - - printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", - FIX_BTMAP_BEGIN); - } -} - -void __init early_ioremap_reset(void) -{ - after_paging_init = 1; -} - -static void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) -{ - unsigned long addr = __fix_to_virt(idx); - pte_t *pte; - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - pte = early_ioremap_pte(addr); - - if (pgprot_val(flags)) - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); - else - pte_clear(&init_mm, addr, pte); - __flush_tlb_one(addr); -} - -static inline void __init early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t prot) -{ - if (after_paging_init) - __set_fixmap(idx, phys, prot); - else - __early_set_fixmap(idx, phys, prot); -} - -static inline void __init early_clear_fixmap(enum fixed_addresses idx) -{ - if (after_paging_init) - clear_fixmap(idx); - else - __early_set_fixmap(idx, 0, __pgprot(0)); -} - -static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; -static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; - -void __init fixup_early_ioremap(void) -{ - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i]) { - WARN_ON(1); - break; - } - } - - early_ioremap_init(); -} - -static int __init check_early_ioremap_leak(void) -{ - int count = 0; - int i; - - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - if (prev_map[i]) - count++; - - if (!count) - return 0; - WARN(1, KERN_WARNING - "Debug warning: early ioremap leak of %d areas detected.\n", - count); - printk(KERN_WARNING - "please boot with early_ioremap_debug and report the dmesg.\n"); - - return 1; -} -late_initcall(check_early_ioremap_leak); - -static void __init __iomem * -__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) -{ - unsigned long offset; - resource_size_t last_addr; - unsigned int nrpages; - enum fixed_addresses idx0, idx; - int i, slot; - - WARN_ON(system_state != SYSTEM_BOOTING); - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (!prev_map[i]) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n", - (u64)phys_addr, size); - WARN_ON(1); - return NULL; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ", - (u64)phys_addr, size, slot); - dump_stack(); - } - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) { - WARN_ON(1); - return NULL; - } - - prev_size[slot] = size; - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr + 1) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) { - WARN_ON(1); - return NULL; - } - - /* - * Ok, go for it.. - */ - idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - idx = idx0; - while (nrpages > 0) { - early_set_fixmap(idx, phys_addr, prot); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - - prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); - return prev_map[slot]; -} - -/* Remap an IO device */ -void __init __iomem * -early_ioremap(resource_size_t phys_addr, unsigned long size) -{ - return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); -} - -/* Remap memory */ -void __init __iomem * -early_memremap(resource_size_t phys_addr, unsigned long size) -{ - return __early_ioremap(phys_addr, size, PAGE_KERNEL); -} - -void __init early_iounmap(void __iomem *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - int i, slot; - - slot = -1; - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { - if (prev_map[i] == addr) { - slot = i; - break; - } - } - - if (slot < 0) { - printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", - addr, size); - WARN_ON(1); - return; - } - - if (prev_size[slot] != size) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", - addr, size, slot, prev_size[slot]); - WARN_ON(1); - return; - } - - if (early_ioremap_debug) { - printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, - size, slot); - dump_stack(); - } - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) { - WARN_ON(1); - return; - } - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - while (nrpages > 0) { - early_clear_fixmap(idx); - --idx; - --nrpages; - } - prev_map[slot] = NULL; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/Makefile b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/Makefile deleted file mode 100644 index 520b3bce..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/error.c b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/error.c deleted file mode 100644 index dab41876..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/error.c +++ /dev/null @@ -1,227 +0,0 @@ -#include <linux/interrupt.h> -#include <linux/kdebug.h> -#include <linux/kmemcheck.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/stacktrace.h> -#include <linux/string.h> - -#include "error.h" -#include "shadow.h" - -enum kmemcheck_error_type { - KMEMCHECK_ERROR_INVALID_ACCESS, - KMEMCHECK_ERROR_BUG, -}; - -#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) - -struct kmemcheck_error { - enum kmemcheck_error_type type; - - union { - /* KMEMCHECK_ERROR_INVALID_ACCESS */ - struct { - /* Kind of access that caused the error */ - enum kmemcheck_shadow state; - /* Address and size of the erroneous read */ - unsigned long address; - unsigned int size; - }; - }; - - struct pt_regs regs; - struct stack_trace trace; - unsigned long trace_entries[32]; - - /* We compress it to a char. */ - unsigned char shadow_copy[SHADOW_COPY_SIZE]; - unsigned char memory_copy[SHADOW_COPY_SIZE]; -}; - -/* - * Create a ring queue of errors to output. We can't call printk() directly - * from the kmemcheck traps, since this may call the console drivers and - * result in a recursive fault. - */ -static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; -static unsigned int error_count; -static unsigned int error_rd; -static unsigned int error_wr; -static unsigned int error_missed_count; - -static struct kmemcheck_error *error_next_wr(void) -{ - struct kmemcheck_error *e; - - if (error_count == ARRAY_SIZE(error_fifo)) { - ++error_missed_count; - return NULL; - } - - e = &error_fifo[error_wr]; - if (++error_wr == ARRAY_SIZE(error_fifo)) - error_wr = 0; - ++error_count; - return e; -} - -static struct kmemcheck_error *error_next_rd(void) -{ - struct kmemcheck_error *e; - - if (error_count == 0) - return NULL; - - e = &error_fifo[error_rd]; - if (++error_rd == ARRAY_SIZE(error_fifo)) - error_rd = 0; - --error_count; - return e; -} - -void kmemcheck_error_recall(void) -{ - static const char *desc[] = { - [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", - [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", - [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", - [KMEMCHECK_SHADOW_FREED] = "freed", - }; - - static const char short_desc[] = { - [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', - [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', - [KMEMCHECK_SHADOW_INITIALIZED] = 'i', - [KMEMCHECK_SHADOW_FREED] = 'f', - }; - - struct kmemcheck_error *e; - unsigned int i; - - e = error_next_rd(); - if (!e) - return; - - switch (e->type) { - case KMEMCHECK_ERROR_INVALID_ACCESS: - printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", - 8 * e->size, e->state < ARRAY_SIZE(desc) ? - desc[e->state] : "(invalid shadow state)", - (void *) e->address); - - printk(KERN_WARNING); - for (i = 0; i < SHADOW_COPY_SIZE; ++i) - printk(KERN_CONT "%02x", e->memory_copy[i]); - printk(KERN_CONT "\n"); - - printk(KERN_WARNING); - for (i = 0; i < SHADOW_COPY_SIZE; ++i) { - if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) - printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); - else - printk(KERN_CONT " ?"); - } - printk(KERN_CONT "\n"); - printk(KERN_WARNING "%*c\n", 2 + 2 - * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); - break; - case KMEMCHECK_ERROR_BUG: - printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); - break; - } - - __show_regs(&e->regs, 1); - print_stack_trace(&e->trace, 0); -} - -static void do_wakeup(unsigned long data) -{ - while (error_count > 0) - kmemcheck_error_recall(); - - if (error_missed_count > 0) { - printk(KERN_WARNING "kmemcheck: Lost %d error reports because " - "the queue was too small\n", error_missed_count); - error_missed_count = 0; - } -} - -static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); - -/* - * Save the context of an error report. - */ -void kmemcheck_error_save(enum kmemcheck_shadow state, - unsigned long address, unsigned int size, struct pt_regs *regs) -{ - static unsigned long prev_ip; - - struct kmemcheck_error *e; - void *shadow_copy; - void *memory_copy; - - /* Don't report several adjacent errors from the same EIP. */ - if (regs->ip == prev_ip) - return; - prev_ip = regs->ip; - - e = error_next_wr(); - if (!e) - return; - - e->type = KMEMCHECK_ERROR_INVALID_ACCESS; - - e->state = state; - e->address = address; - e->size = size; - - /* Save regs */ - memcpy(&e->regs, regs, sizeof(*regs)); - - /* Save stack trace */ - e->trace.nr_entries = 0; - e->trace.entries = e->trace_entries; - e->trace.max_entries = ARRAY_SIZE(e->trace_entries); - e->trace.skip = 0; - save_stack_trace_regs(regs, &e->trace); - - /* Round address down to nearest 16 bytes */ - shadow_copy = kmemcheck_shadow_lookup(address - & ~(SHADOW_COPY_SIZE - 1)); - BUG_ON(!shadow_copy); - - memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); - - kmemcheck_show_addr(address); - memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); - memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); - kmemcheck_hide_addr(address); - - tasklet_hi_schedule_first(&kmemcheck_tasklet); -} - -/* - * Save the context of a kmemcheck bug. - */ -void kmemcheck_error_save_bug(struct pt_regs *regs) -{ - struct kmemcheck_error *e; - - e = error_next_wr(); - if (!e) - return; - - e->type = KMEMCHECK_ERROR_BUG; - - memcpy(&e->regs, regs, sizeof(*regs)); - - e->trace.nr_entries = 0; - e->trace.entries = e->trace_entries; - e->trace.max_entries = ARRAY_SIZE(e->trace_entries); - e->trace.skip = 1; - save_stack_trace(&e->trace); - - tasklet_hi_schedule_first(&kmemcheck_tasklet); -} diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/error.h b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/error.h deleted file mode 100644 index 0efc2e8d..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/error.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H -#define ARCH__X86__MM__KMEMCHECK__ERROR_H - -#include <linux/ptrace.h> - -#include "shadow.h" - -void kmemcheck_error_save(enum kmemcheck_shadow state, - unsigned long address, unsigned int size, struct pt_regs *regs); - -void kmemcheck_error_save_bug(struct pt_regs *regs); - -void kmemcheck_error_recall(void); - -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/kmemcheck.c b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/kmemcheck.c deleted file mode 100644 index d87dd6d0..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/kmemcheck.c +++ /dev/null @@ -1,653 +0,0 @@ -/** - * kmemcheck - a heavyweight memory checker for the linux kernel - * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> - * (With a lot of help from Ingo Molnar and Pekka Enberg.) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2) as - * published by the Free Software Foundation. - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kallsyms.h> -#include <linux/kernel.h> -#include <linux/kmemcheck.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/page-flags.h> -#include <linux/percpu.h> -#include <linux/ptrace.h> -#include <linux/string.h> -#include <linux/types.h> - -#include <asm/cacheflush.h> -#include <asm/kmemcheck.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> - -#include "error.h" -#include "opcode.h" -#include "pte.h" -#include "selftest.h" -#include "shadow.h" - - -#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT -# define KMEMCHECK_ENABLED 0 -#endif - -#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT -# define KMEMCHECK_ENABLED 1 -#endif - -#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT -# define KMEMCHECK_ENABLED 2 -#endif - -int kmemcheck_enabled = KMEMCHECK_ENABLED; - -int __init kmemcheck_init(void) -{ -#ifdef CONFIG_SMP - /* - * Limit SMP to use a single CPU. We rely on the fact that this code - * runs before SMP is set up. - */ - if (setup_max_cpus > 1) { - printk(KERN_INFO - "kmemcheck: Limiting number of CPUs to 1.\n"); - setup_max_cpus = 1; - } -#endif - - if (!kmemcheck_selftest()) { - printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); - kmemcheck_enabled = 0; - return -EINVAL; - } - - printk(KERN_INFO "kmemcheck: Initialized\n"); - return 0; -} - -early_initcall(kmemcheck_init); - -/* - * We need to parse the kmemcheck= option before any memory is allocated. - */ -static int __init param_kmemcheck(char *str) -{ - if (!str) - return -EINVAL; - - sscanf(str, "%d", &kmemcheck_enabled); - return 0; -} - -early_param("kmemcheck", param_kmemcheck); - -int kmemcheck_show_addr(unsigned long address) -{ - pte_t *pte; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return 0; - - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - __flush_tlb_one(address); - return 1; -} - -int kmemcheck_hide_addr(unsigned long address) -{ - pte_t *pte; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return 0; - - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); - __flush_tlb_one(address); - return 1; -} - -struct kmemcheck_context { - bool busy; - int balance; - - /* - * There can be at most two memory operands to an instruction, but - * each address can cross a page boundary -- so we may need up to - * four addresses that must be hidden/revealed for each fault. - */ - unsigned long addr[4]; - unsigned long n_addrs; - unsigned long flags; - - /* Data size of the instruction that caused a fault. */ - unsigned int size; -}; - -static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); - -bool kmemcheck_active(struct pt_regs *regs) -{ - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); - - return data->balance > 0; -} - -/* Save an address that needs to be shown/hidden */ -static void kmemcheck_save_addr(unsigned long addr) -{ - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); - - BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); - data->addr[data->n_addrs++] = addr; -} - -static unsigned int kmemcheck_show_all(void) -{ - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); - unsigned int i; - unsigned int n; - - n = 0; - for (i = 0; i < data->n_addrs; ++i) - n += kmemcheck_show_addr(data->addr[i]); - - return n; -} - -static unsigned int kmemcheck_hide_all(void) -{ - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); - unsigned int i; - unsigned int n; - - n = 0; - for (i = 0; i < data->n_addrs; ++i) - n += kmemcheck_hide_addr(data->addr[i]); - - return n; -} - -/* - * Called from the #PF handler. - */ -void kmemcheck_show(struct pt_regs *regs) -{ - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); - - BUG_ON(!irqs_disabled()); - - if (unlikely(data->balance != 0)) { - kmemcheck_show_all(); - kmemcheck_error_save_bug(regs); - data->balance = 0; - return; - } - - /* - * None of the addresses actually belonged to kmemcheck. Note that - * this is not an error. - */ - if (kmemcheck_show_all() == 0) - return; - - ++data->balance; - - /* - * The IF needs to be cleared as well, so that the faulting - * instruction can run "uninterrupted". Otherwise, we might take - * an interrupt and start executing that before we've had a chance - * to hide the page again. - * - * NOTE: In the rare case of multiple faults, we must not override - * the original flags: - */ - if (!(regs->flags & X86_EFLAGS_TF)) - data->flags = regs->flags; - - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; -} - -/* - * Called from the #DB handler. - */ -void kmemcheck_hide(struct pt_regs *regs) -{ - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); - int n; - - BUG_ON(!irqs_disabled()); - - if (unlikely(data->balance != 1)) { - kmemcheck_show_all(); - kmemcheck_error_save_bug(regs); - data->n_addrs = 0; - data->balance = 0; - - if (!(data->flags & X86_EFLAGS_TF)) - regs->flags &= ~X86_EFLAGS_TF; - if (data->flags & X86_EFLAGS_IF) - regs->flags |= X86_EFLAGS_IF; - return; - } - - if (kmemcheck_enabled) - n = kmemcheck_hide_all(); - else - n = kmemcheck_show_all(); - - if (n == 0) - return; - - --data->balance; - - data->n_addrs = 0; - - if (!(data->flags & X86_EFLAGS_TF)) - regs->flags &= ~X86_EFLAGS_TF; - if (data->flags & X86_EFLAGS_IF) - regs->flags |= X86_EFLAGS_IF; -} - -void kmemcheck_show_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) { - unsigned long address; - pte_t *pte; - unsigned int level; - - address = (unsigned long) page_address(&p[i]); - pte = lookup_address(address, &level); - BUG_ON(!pte); - BUG_ON(level != PG_LEVEL_4K); - - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); - __flush_tlb_one(address); - } -} - -bool kmemcheck_page_is_tracked(struct page *p) -{ - /* This will also check the "hidden" flag of the PTE. */ - return kmemcheck_pte_lookup((unsigned long) page_address(p)); -} - -void kmemcheck_hide_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) { - unsigned long address; - pte_t *pte; - unsigned int level; - - address = (unsigned long) page_address(&p[i]); - pte = lookup_address(address, &level); - BUG_ON(!pte); - BUG_ON(level != PG_LEVEL_4K); - - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); - set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); - __flush_tlb_one(address); - } -} - -/* Access may NOT cross page boundary */ -static void kmemcheck_read_strict(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - void *shadow; - enum kmemcheck_shadow status; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return; - - kmemcheck_save_addr(addr); - status = kmemcheck_shadow_test(shadow, size); - if (status == KMEMCHECK_SHADOW_INITIALIZED) - return; - - if (kmemcheck_enabled) - kmemcheck_error_save(status, addr, size, regs); - - if (kmemcheck_enabled == 2) - kmemcheck_enabled = 0; - - /* Don't warn about it again. */ - kmemcheck_shadow_set(shadow, size); -} - -bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) -{ - enum kmemcheck_shadow status; - void *shadow; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return true; - - status = kmemcheck_shadow_test_all(shadow, size); - - return status == KMEMCHECK_SHADOW_INITIALIZED; -} - -/* Access may cross page boundary */ -static void kmemcheck_read(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - unsigned long page = addr & PAGE_MASK; - unsigned long next_addr = addr + size - 1; - unsigned long next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - kmemcheck_read_strict(regs, addr, size); - return; - } - - /* - * What we do is basically to split the access across the - * two pages and handle each part separately. Yes, this means - * that we may now see reads that are 3 + 5 bytes, for - * example (and if both are uninitialized, there will be two - * reports), but it makes the code a lot simpler. - */ - kmemcheck_read_strict(regs, addr, next_page - addr); - kmemcheck_read_strict(regs, next_page, next_addr - next_page); -} - -static void kmemcheck_write_strict(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - void *shadow; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return; - - kmemcheck_save_addr(addr); - kmemcheck_shadow_set(shadow, size); -} - -static void kmemcheck_write(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - unsigned long page = addr & PAGE_MASK; - unsigned long next_addr = addr + size - 1; - unsigned long next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - kmemcheck_write_strict(regs, addr, size); - return; - } - - /* See comment in kmemcheck_read(). */ - kmemcheck_write_strict(regs, addr, next_page - addr); - kmemcheck_write_strict(regs, next_page, next_addr - next_page); -} - -/* - * Copying is hard. We have two addresses, each of which may be split across - * a page (and each page will have different shadow addresses). - */ -static void kmemcheck_copy(struct pt_regs *regs, - unsigned long src_addr, unsigned long dst_addr, unsigned int size) -{ - uint8_t shadow[8]; - enum kmemcheck_shadow status; - - unsigned long page; - unsigned long next_addr; - unsigned long next_page; - - uint8_t *x; - unsigned int i; - unsigned int n; - - BUG_ON(size > sizeof(shadow)); - - page = src_addr & PAGE_MASK; - next_addr = src_addr + size - 1; - next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - /* Same page */ - x = kmemcheck_shadow_lookup(src_addr); - if (x) { - kmemcheck_save_addr(src_addr); - for (i = 0; i < size; ++i) - shadow[i] = x[i]; - } else { - for (i = 0; i < size; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } else { - n = next_page - src_addr; - BUG_ON(n > sizeof(shadow)); - - /* First page */ - x = kmemcheck_shadow_lookup(src_addr); - if (x) { - kmemcheck_save_addr(src_addr); - for (i = 0; i < n; ++i) - shadow[i] = x[i]; - } else { - /* Not tracked */ - for (i = 0; i < n; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - - /* Second page */ - x = kmemcheck_shadow_lookup(next_page); - if (x) { - kmemcheck_save_addr(next_page); - for (i = n; i < size; ++i) - shadow[i] = x[i - n]; - } else { - /* Not tracked */ - for (i = n; i < size; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - - page = dst_addr & PAGE_MASK; - next_addr = dst_addr + size - 1; - next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - /* Same page */ - x = kmemcheck_shadow_lookup(dst_addr); - if (x) { - kmemcheck_save_addr(dst_addr); - for (i = 0; i < size; ++i) { - x[i] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - } else { - n = next_page - dst_addr; - BUG_ON(n > sizeof(shadow)); - - /* First page */ - x = kmemcheck_shadow_lookup(dst_addr); - if (x) { - kmemcheck_save_addr(dst_addr); - for (i = 0; i < n; ++i) { - x[i] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - - /* Second page */ - x = kmemcheck_shadow_lookup(next_page); - if (x) { - kmemcheck_save_addr(next_page); - for (i = n; i < size; ++i) { - x[i - n] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - } - - status = kmemcheck_shadow_test(shadow, size); - if (status == KMEMCHECK_SHADOW_INITIALIZED) - return; - - if (kmemcheck_enabled) - kmemcheck_error_save(status, src_addr, size, regs); - - if (kmemcheck_enabled == 2) - kmemcheck_enabled = 0; -} - -enum kmemcheck_method { - KMEMCHECK_READ, - KMEMCHECK_WRITE, -}; - -static void kmemcheck_access(struct pt_regs *regs, - unsigned long fallback_address, enum kmemcheck_method fallback_method) -{ - const uint8_t *insn; - const uint8_t *insn_primary; - unsigned int size; - - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); - - /* Recursive fault -- ouch. */ - if (data->busy) { - kmemcheck_show_addr(fallback_address); - kmemcheck_error_save_bug(regs); - return; - } - - data->busy = true; - - insn = (const uint8_t *) regs->ip; - insn_primary = kmemcheck_opcode_get_primary(insn); - - kmemcheck_opcode_decode(insn, &size); - - switch (insn_primary[0]) { -#ifdef CONFIG_KMEMCHECK_BITOPS_OK - /* AND, OR, XOR */ - /* - * Unfortunately, these instructions have to be excluded from - * our regular checking since they access only some (and not - * all) bits. This clears out "bogus" bitfield-access warnings. - */ - case 0x80: - case 0x81: - case 0x82: - case 0x83: - switch ((insn_primary[1] >> 3) & 7) { - /* OR */ - case 1: - /* AND */ - case 4: - /* XOR */ - case 6: - kmemcheck_write(regs, fallback_address, size); - goto out; - - /* ADD */ - case 0: - /* ADC */ - case 2: - /* SBB */ - case 3: - /* SUB */ - case 5: - /* CMP */ - case 7: - break; - } - break; -#endif - - /* MOVS, MOVSB, MOVSW, MOVSD */ - case 0xa4: - case 0xa5: - /* - * These instructions are special because they take two - * addresses, but we only get one page fault. - */ - kmemcheck_copy(regs, regs->si, regs->di, size); - goto out; - - /* CMPS, CMPSB, CMPSW, CMPSD */ - case 0xa6: - case 0xa7: - kmemcheck_read(regs, regs->si, size); - kmemcheck_read(regs, regs->di, size); - goto out; - } - - /* - * If the opcode isn't special in any way, we use the data from the - * page fault handler to determine the address and type of memory - * access. - */ - switch (fallback_method) { - case KMEMCHECK_READ: - kmemcheck_read(regs, fallback_address, size); - goto out; - case KMEMCHECK_WRITE: - kmemcheck_write(regs, fallback_address, size); - goto out; - } - -out: - data->busy = false; -} - -bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - pte_t *pte; - - /* - * XXX: Is it safe to assume that memory accesses from virtual 86 - * mode or non-kernel code segments will _never_ access kernel - * memory (e.g. tracked pages)? For now, we need this to avoid - * invoking kmemcheck for PnP BIOS calls. - */ - if (regs->flags & X86_VM_MASK) - return false; - if (regs->cs != __KERNEL_CS) - return false; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return false; - - WARN_ON_ONCE(in_nmi()); - - if (error_code & 2) - kmemcheck_access(regs, address, KMEMCHECK_WRITE); - else - kmemcheck_access(regs, address, KMEMCHECK_READ); - - kmemcheck_show(regs); - return true; -} - -bool kmemcheck_trap(struct pt_regs *regs) -{ - if (!kmemcheck_active(regs)) - return false; - - /* We're done. */ - kmemcheck_hide(regs); - return true; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/opcode.c b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/opcode.c deleted file mode 100644 index 324aa3f0..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/opcode.c +++ /dev/null @@ -1,106 +0,0 @@ -#include <linux/types.h> - -#include "opcode.h" - -static bool opcode_is_prefix(uint8_t b) -{ - return - /* Group 1 */ - b == 0xf0 || b == 0xf2 || b == 0xf3 - /* Group 2 */ - || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 - || b == 0x64 || b == 0x65 - /* Group 3 */ - || b == 0x66 - /* Group 4 */ - || b == 0x67; -} - -#ifdef CONFIG_X86_64 -static bool opcode_is_rex_prefix(uint8_t b) -{ - return (b & 0xf0) == 0x40; -} -#else -static bool opcode_is_rex_prefix(uint8_t b) -{ - return false; -} -#endif - -#define REX_W (1 << 3) - -/* - * This is a VERY crude opcode decoder. We only need to find the size of the - * load/store that caused our #PF and this should work for all the opcodes - * that we care about. Moreover, the ones who invented this instruction set - * should be shot. - */ -void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) -{ - /* Default operand size */ - int operand_size_override = 4; - - /* prefixes */ - for (; opcode_is_prefix(*op); ++op) { - if (*op == 0x66) - operand_size_override = 2; - } - - /* REX prefix */ - if (opcode_is_rex_prefix(*op)) { - uint8_t rex = *op; - - ++op; - if (rex & REX_W) { - switch (*op) { - case 0x63: - *size = 4; - return; - case 0x0f: - ++op; - - switch (*op) { - case 0xb6: - case 0xbe: - *size = 1; - return; - case 0xb7: - case 0xbf: - *size = 2; - return; - } - - break; - } - - *size = 8; - return; - } - } - - /* escape opcode */ - if (*op == 0x0f) { - ++op; - - /* - * This is move with zero-extend and sign-extend, respectively; - * we don't have to think about 0xb6/0xbe, because this is - * already handled in the conditional below. - */ - if (*op == 0xb7 || *op == 0xbf) - operand_size_override = 2; - } - - *size = (*op & 1) ? operand_size_override : 1; -} - -const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) -{ - /* skip prefixes */ - while (opcode_is_prefix(*op)) - ++op; - if (opcode_is_rex_prefix(*op)) - ++op; - return op; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/opcode.h b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/opcode.h deleted file mode 100644 index 6956aad6..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/opcode.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H -#define ARCH__X86__MM__KMEMCHECK__OPCODE_H - -#include <linux/types.h> - -void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); -const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); - -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/pte.c b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/pte.c deleted file mode 100644 index 4ead26ee..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/pte.c +++ /dev/null @@ -1,22 +0,0 @@ -#include <linux/mm.h> - -#include <asm/pgtable.h> - -#include "pte.h" - -pte_t *kmemcheck_pte_lookup(unsigned long address) -{ - pte_t *pte; - unsigned int level; - - pte = lookup_address(address, &level); - if (!pte) - return NULL; - if (level != PG_LEVEL_4K) - return NULL; - if (!pte_hidden(*pte)) - return NULL; - - return pte; -} - diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/pte.h b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/pte.h deleted file mode 100644 index 9f596645..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/pte.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H -#define ARCH__X86__MM__KMEMCHECK__PTE_H - -#include <linux/mm.h> - -#include <asm/pgtable.h> - -pte_t *kmemcheck_pte_lookup(unsigned long address); - -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/selftest.c b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/selftest.c deleted file mode 100644 index aef7140c..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/selftest.c +++ /dev/null @@ -1,70 +0,0 @@ -#include <linux/bug.h> -#include <linux/kernel.h> - -#include "opcode.h" -#include "selftest.h" - -struct selftest_opcode { - unsigned int expected_size; - const uint8_t *insn; - const char *desc; -}; - -static const struct selftest_opcode selftest_opcodes[] = { - /* REP MOVS */ - {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"}, - {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"}, - - /* MOVZX / MOVZXD */ - {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"}, - {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"}, - - /* MOVSX / MOVSXD */ - {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"}, - {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"}, - -#ifdef CONFIG_X86_64 - /* MOVZX / MOVZXD */ - {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"}, - {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"}, - - /* MOVSX / MOVSXD */ - {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"}, - {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"}, - {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"}, -#endif -}; - -static bool selftest_opcode_one(const struct selftest_opcode *op) -{ - unsigned size; - - kmemcheck_opcode_decode(op->insn, &size); - - if (size == op->expected_size) - return true; - - printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", - op->desc, op->expected_size, size); - return false; -} - -static bool selftest_opcodes_all(void) -{ - bool pass = true; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) - pass = pass && selftest_opcode_one(&selftest_opcodes[i]); - - return pass; -} - -bool kmemcheck_selftest(void) -{ - bool pass = true; - - pass = pass && selftest_opcodes_all(); - - return pass; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/selftest.h b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/selftest.h deleted file mode 100644 index 8fed4fe1..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/selftest.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H -#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H - -bool kmemcheck_selftest(void); - -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/shadow.c b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/shadow.c deleted file mode 100644 index aec12421..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/shadow.c +++ /dev/null @@ -1,173 +0,0 @@ -#include <linux/kmemcheck.h> -#include <linux/module.h> -#include <linux/mm.h> - -#include <asm/page.h> -#include <asm/pgtable.h> - -#include "pte.h" -#include "shadow.h" - -/* - * Return the shadow address for the given address. Returns NULL if the - * address is not tracked. - * - * We need to be extremely careful not to follow any invalid pointers, - * because this function can be called for *any* possible address. - */ -void *kmemcheck_shadow_lookup(unsigned long address) -{ - pte_t *pte; - struct page *page; - - if (!virt_addr_valid(address)) - return NULL; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return NULL; - - page = virt_to_page(address); - if (!page->shadow) - return NULL; - return page->shadow + (address & (PAGE_SIZE - 1)); -} - -static void mark_shadow(void *address, unsigned int n, - enum kmemcheck_shadow status) -{ - unsigned long addr = (unsigned long) address; - unsigned long last_addr = addr + n - 1; - unsigned long page = addr & PAGE_MASK; - unsigned long last_page = last_addr & PAGE_MASK; - unsigned int first_n; - void *shadow; - - /* If the memory range crosses a page boundary, stop there. */ - if (page == last_page) - first_n = n; - else - first_n = page + PAGE_SIZE - addr; - - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, first_n); - - addr += first_n; - n -= first_n; - - /* Do full-page memset()s. */ - while (n >= PAGE_SIZE) { - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, PAGE_SIZE); - - addr += PAGE_SIZE; - n -= PAGE_SIZE; - } - - /* Do the remaining page, if any. */ - if (n > 0) { - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, n); - } -} - -void kmemcheck_mark_unallocated(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); -} - -void kmemcheck_mark_uninitialized(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); -} - -/* - * Fill the shadow memory of the given address such that the memory at that - * address is marked as being initialized. - */ -void kmemcheck_mark_initialized(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); -} -EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); - -void kmemcheck_mark_freed(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); -} - -void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); -} - -void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); -} - -void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); -} - -enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) -{ -#ifdef CONFIG_KMEMCHECK_PARTIAL_OK - uint8_t *x; - unsigned int i; - - x = shadow; - - /* - * Make sure _some_ bytes are initialized. Gcc frequently generates - * code to access neighboring bytes. - */ - for (i = 0; i < size; ++i) { - if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) - return x[i]; - } - - return x[0]; -#else - return kmemcheck_shadow_test_all(shadow, size); -#endif -} - -enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) -{ - uint8_t *x; - unsigned int i; - - x = shadow; - - /* All bytes must be initialized. */ - for (i = 0; i < size; ++i) { - if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) - return x[i]; - } - - return x[0]; -} - -void kmemcheck_shadow_set(void *shadow, unsigned int size) -{ - uint8_t *x; - unsigned int i; - - x = shadow; - for (i = 0; i < size; ++i) - x[i] = KMEMCHECK_SHADOW_INITIALIZED; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/shadow.h b/ANDROID_3.4.5/arch/x86/mm/kmemcheck/shadow.h deleted file mode 100644 index ff0b2f70..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmemcheck/shadow.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H -#define ARCH__X86__MM__KMEMCHECK__SHADOW_H - -enum kmemcheck_shadow { - KMEMCHECK_SHADOW_UNALLOCATED, - KMEMCHECK_SHADOW_UNINITIALIZED, - KMEMCHECK_SHADOW_INITIALIZED, - KMEMCHECK_SHADOW_FREED, -}; - -void *kmemcheck_shadow_lookup(unsigned long address); - -enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); -enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, - unsigned int size); -void kmemcheck_shadow_set(void *shadow, unsigned int size); - -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/kmmio.c b/ANDROID_3.4.5/arch/x86/mm/kmmio.c deleted file mode 100644 index e5d5e2ce..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/kmmio.c +++ /dev/null @@ -1,590 +0,0 @@ -/* Support for MMIO probes. - * Benfit many code from kprobes - * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>. - * 2007 Alexander Eichner - * 2008 Pekka Paalanen <pq@iki.fi> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/list.h> -#include <linux/rculist.h> -#include <linux/spinlock.h> -#include <linux/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/uaccess.h> -#include <linux/ptrace.h> -#include <linux/preempt.h> -#include <linux/percpu.h> -#include <linux/kdebug.h> -#include <linux/mutex.h> -#include <linux/io.h> -#include <linux/slab.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> -#include <linux/errno.h> -#include <asm/debugreg.h> -#include <linux/mmiotrace.h> - -#define KMMIO_PAGE_HASH_BITS 4 -#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) - -struct kmmio_fault_page { - struct list_head list; - struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ - pteval_t old_presence; /* page presence prior to arming */ - bool armed; - - /* - * Number of times this page has been registered as a part - * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU) and post_kmmio_handler(). - * Protected by kmmio_lock, when linked into kmmio_page_table. - */ - int count; - - bool scheduled_for_release; -}; - -struct kmmio_delayed_release { - struct rcu_head rcu; - struct kmmio_fault_page *release_list; -}; - -struct kmmio_context { - struct kmmio_fault_page *fpage; - struct kmmio_probe *probe; - unsigned long saved_flags; - unsigned long addr; - int active; -}; - -static DEFINE_SPINLOCK(kmmio_lock); - -/* Protected by kmmio_lock */ -unsigned int kmmio_count; - -/* Read-protected by RCU, write-protected by kmmio_lock. */ -static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; -static LIST_HEAD(kmmio_probes); - -static struct list_head *kmmio_page_list(unsigned long page) -{ - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; -} - -/* Accessed per-cpu */ -static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); - -/* - * this is basically a dynamic stabbing problem: - * Could use the existing prio tree code or - * Possible better implementations: - * The Interval Skip List: A Data Structure for Finding All Intervals That - * Overlap a Point (might be simple) - * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup - */ -/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ -static struct kmmio_probe *get_kmmio_probe(unsigned long addr) -{ - struct kmmio_probe *p; - list_for_each_entry_rcu(p, &kmmio_probes, list) { - if (addr >= p->addr && addr < (p->addr + p->len)) - return p; - } - return NULL; -} - -/* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) -{ - struct list_head *head; - struct kmmio_fault_page *f; - - page &= PAGE_MASK; - head = kmmio_page_list(page); - list_for_each_entry_rcu(f, head, list) { - if (f->page == page) - return f; - } - return NULL; -} - -static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) -{ - pmdval_t v = pmd_val(*pmd); - if (clear) { - *old = v & _PAGE_PRESENT; - v &= ~_PAGE_PRESENT; - } else /* presume this has been called with clear==true previously */ - v |= *old; - set_pmd(pmd, __pmd(v)); -} - -static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) -{ - pteval_t v = pte_val(*pte); - if (clear) { - *old = v & _PAGE_PRESENT; - v &= ~_PAGE_PRESENT; - } else /* presume this has been called with clear==true previously */ - v |= *old; - set_pte_atomic(pte, __pte(v)); -} - -static int clear_page_presence(struct kmmio_fault_page *f, bool clear) -{ - unsigned int level; - pte_t *pte = lookup_address(f->page, &level); - - if (!pte) { - pr_err("no pte for page 0x%08lx\n", f->page); - return -1; - } - - switch (level) { - case PG_LEVEL_2M: - clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); - break; - case PG_LEVEL_4K: - clear_pte_presence(pte, clear, &f->old_presence); - break; - default: - pr_err("unexpected page level 0x%x.\n", level); - return -1; - } - - __flush_tlb_one(f->page); - return 0; -} - -/* - * Mark the given page as not present. Access to it will trigger a fault. - * - * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the - * protection is ignored here. RCU read lock is assumed held, so the struct - * will not disappear unexpectedly. Furthermore, the caller must guarantee, - * that double arming the same virtual address (page) cannot occur. - * - * Double disarming on the other hand is allowed, and may occur when a fault - * and mmiotrace shutdown happen simultaneously. - */ -static int arm_kmmio_fault_page(struct kmmio_fault_page *f) -{ - int ret; - WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); - if (f->armed) { - pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); - } - ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), - f->page); - f->armed = true; - return ret; -} - -/** Restore the given page to saved presence state. */ -static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) -{ - int ret = clear_page_presence(f, false); - WARN_ONCE(ret < 0, - KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); - f->armed = false; -} - -/* - * This is being called from do_page_fault(). - * - * We may be in an interrupt or a critical section. Also prefecthing may - * trigger a page fault. We may be in the middle of process switch. - * We cannot take any locks, because we could be executing especially - * within a kmmio critical section. - * - * Local interrupts are disabled, so preemption cannot happen. - * Do not enable interrupts, do not sleep, and watch out for other CPUs. - */ -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate - * and they remain disabled throughout this function. - */ -int kmmio_handler(struct pt_regs *regs, unsigned long addr) -{ - struct kmmio_context *ctx; - struct kmmio_fault_page *faultpage; - int ret = 0; /* default to fault not handled */ - - /* - * Preemption is now disabled to prevent process switch during - * single stepping. We can only handle one active kmmio trace - * per cpu, so ensure that we finish it before something else - * gets to run. We also hold the RCU read lock over single - * stepping to avoid looking up the probe and kmmio_fault_page - * again. - */ - preempt_disable(); - rcu_read_lock(); - - faultpage = get_kmmio_fault_page(addr); - if (!faultpage) { - /* - * Either this page fault is not caused by kmmio, or - * another CPU just pulled the kmmio probe from under - * our feet. The latter case should not be possible. - */ - goto no_kmmio; - } - - ctx = &get_cpu_var(kmmio_ctx); - if (ctx->active) { - if (addr == ctx->addr) { - /* - * A second fault on the same page means some other - * condition needs handling by do_page_fault(), the - * page really not being present is the most common. - */ - pr_debug("secondary hit for 0x%08lx CPU %d.\n", - addr, smp_processor_id()); - - if (!faultpage->old_presence) - pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", - addr, smp_processor_id()); - } else { - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at - * least prevents a panic. - */ - pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", - smp_processor_id(), addr); - pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); - disarm_kmmio_fault_page(faultpage); - } - goto no_kmmio_ctx; - } - ctx->active++; - - ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); - ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; - - if (ctx->probe && ctx->probe->pre_handler) - ctx->probe->pre_handler(ctx->probe, regs, addr); - - /* - * Enable single-stepping and disable interrupts for the faulting - * context. Local interrupts must not get enabled during stepping. - */ - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; - - /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage); - - /* - * If another cpu accesses the same page while we are stepping, - * the access will not be caught. It will simply succeed and the - * only downside is we lose the event. If this becomes a problem, - * the user should drop to single cpu before tracing. - */ - - put_cpu_var(kmmio_ctx); - return 1; /* fault handled */ - -no_kmmio_ctx: - put_cpu_var(kmmio_ctx); -no_kmmio: - rcu_read_unlock(); - preempt_enable_no_resched(); - return ret; -} - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate - * and they remain disabled throughout this function. - * This must always get called as the pair to kmmio_handler(). - */ -static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) -{ - int ret = 0; - struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); - - if (!ctx->active) { - /* - * debug traps without an active context are due to either - * something external causing them (f.e. using a debugger while - * mmio tracing enabled), or erroneous behaviour - */ - pr_warning("unexpected debug trap on CPU %d.\n", - smp_processor_id()); - goto out; - } - - if (ctx->probe && ctx->probe->post_handler) - ctx->probe->post_handler(ctx->probe, condition, regs); - - /* Prevent racing against release_kmmio_fault_page(). */ - spin_lock(&kmmio_lock); - if (ctx->fpage->count) - arm_kmmio_fault_page(ctx->fpage); - spin_unlock(&kmmio_lock); - - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= ctx->saved_flags; - - /* These were acquired in kmmio_handler(). */ - ctx->active--; - BUG_ON(ctx->active); - rcu_read_unlock(); - preempt_enable_no_resched(); - - /* - * if somebody else is singlestepping across a probe point, flags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (!(regs->flags & X86_EFLAGS_TF)) - ret = 1; -out: - put_cpu_var(kmmio_ctx); - return ret; -} - -/* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) -{ - struct kmmio_fault_page *f; - - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); - if (f) { - if (!f->count) - arm_kmmio_fault_page(f); - f->count++; - return 0; - } - - f = kzalloc(sizeof(*f), GFP_ATOMIC); - if (!f) - return -1; - - f->count = 1; - f->page = page; - - if (arm_kmmio_fault_page(f)) { - kfree(f); - return -1; - } - - list_add_rcu(&f->list, kmmio_page_list(f->page)); - - return 0; -} - -/* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, - struct kmmio_fault_page **release_list) -{ - struct kmmio_fault_page *f; - - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); - if (!f) - return; - - f->count--; - BUG_ON(f->count < 0); - if (!f->count) { - disarm_kmmio_fault_page(f); - if (!f->scheduled_for_release) { - f->release_next = *release_list; - *release_list = f; - f->scheduled_for_release = true; - } - } -} - -/* - * With page-unaligned ioremaps, one or two armed pages may contain - * addresses from outside the intended mapping. Events for these addresses - * are currently silently dropped. The events may result only from programming - * mistakes by accessing addresses before the beginning or past the end of a - * mapping. - */ -int register_kmmio_probe(struct kmmio_probe *p) -{ - unsigned long flags; - int ret = 0; - unsigned long size = 0; - const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); - - spin_lock_irqsave(&kmmio_lock, flags); - if (get_kmmio_probe(p->addr)) { - ret = -EEXIST; - goto out; - } - kmmio_count++; - list_add_rcu(&p->list, &kmmio_probes); - while (size < size_lim) { - if (add_kmmio_fault_page(p->addr + size)) - pr_err("Unable to set page fault.\n"); - size += PAGE_SIZE; - } -out: - spin_unlock_irqrestore(&kmmio_lock, flags); - /* - * XXX: What should I do here? - * Here was a call to global_flush_tlb(), but it does not exist - * anymore. It seems it's not needed after all. - */ - return ret; -} -EXPORT_SYMBOL(register_kmmio_probe); - -static void rcu_free_kmmio_fault_pages(struct rcu_head *head) -{ - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); - struct kmmio_fault_page *f = dr->release_list; - while (f) { - struct kmmio_fault_page *next = f->release_next; - BUG_ON(f->count); - kfree(f); - f = next; - } - kfree(dr); -} - -static void remove_kmmio_fault_pages(struct rcu_head *head) -{ - struct kmmio_delayed_release *dr = - container_of(head, struct kmmio_delayed_release, rcu); - struct kmmio_fault_page *f = dr->release_list; - struct kmmio_fault_page **prevp = &dr->release_list; - unsigned long flags; - - spin_lock_irqsave(&kmmio_lock, flags); - while (f) { - if (!f->count) { - list_del_rcu(&f->list); - prevp = &f->release_next; - } else { - *prevp = f->release_next; - f->release_next = NULL; - f->scheduled_for_release = false; - } - f = *prevp; - } - spin_unlock_irqrestore(&kmmio_lock, flags); - - /* This is the real RCU destroy call. */ - call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); -} - -/* - * Remove a kmmio probe. You have to synchronize_rcu() before you can be - * sure that the callbacks will not be called anymore. Only after that - * you may actually release your struct kmmio_probe. - * - * Unregistering a kmmio fault page has three steps: - * 1. release_kmmio_fault_page() - * Disarm the page, wait a grace period to let all faults finish. - * 2. remove_kmmio_fault_pages() - * Remove the pages from kmmio_page_table. - * 3. rcu_free_kmmio_fault_pages() - * Actually free the kmmio_fault_page structs as with RCU. - */ -void unregister_kmmio_probe(struct kmmio_probe *p) -{ - unsigned long flags; - unsigned long size = 0; - const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); - struct kmmio_fault_page *release_list = NULL; - struct kmmio_delayed_release *drelease; - - spin_lock_irqsave(&kmmio_lock, flags); - while (size < size_lim) { - release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; - } - list_del_rcu(&p->list); - kmmio_count--; - spin_unlock_irqrestore(&kmmio_lock, flags); - - if (!release_list) - return; - - drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); - if (!drelease) { - pr_crit("leaking kmmio_fault_page objects.\n"); - return; - } - drelease->release_list = release_list; - - /* - * This is not really RCU here. We have just disarmed a set of - * pages so that they cannot trigger page faults anymore. However, - * we cannot remove the pages from kmmio_page_table, - * because a probe hit might be in flight on another CPU. The - * pages are collected into a list, and they will be removed from - * kmmio_page_table when it is certain that no probe hit related to - * these pages can be in flight. RCU grace period sounds like a - * good choice. - * - * If we removed the pages too early, kmmio page fault handler might - * not find the respective kmmio_fault_page and determine it's not - * a kmmio fault, when it actually is. This would lead to madness. - */ - call_rcu(&drelease->rcu, remove_kmmio_fault_pages); -} -EXPORT_SYMBOL(unregister_kmmio_probe); - -static int -kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) -{ - struct die_args *arg = args; - unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err); - - if (val == DIE_DEBUG && (*dr6_p & DR_STEP)) - if (post_kmmio_handler(*dr6_p, arg->regs) == 1) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - *dr6_p &= ~DR_STEP; - return NOTIFY_STOP; - } - - return NOTIFY_DONE; -} - -static struct notifier_block nb_die = { - .notifier_call = kmmio_die_notifier -}; - -int kmmio_init(void) -{ - int i; - - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - - return register_die_notifier(&nb_die); -} - -void kmmio_cleanup(void) -{ - int i; - - unregister_die_notifier(&nb_die); - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { - WARN_ONCE(!list_empty(&kmmio_page_table[i]), - KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"); - } -} diff --git a/ANDROID_3.4.5/arch/x86/mm/memtest.c b/ANDROID_3.4.5/arch/x86/mm/memtest.c deleted file mode 100644 index c80b9fb9..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/memtest.c +++ /dev/null @@ -1,124 +0,0 @@ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/pfn.h> -#include <linux/memblock.h> - -static u64 patterns[] __initdata = { - 0, - 0xffffffffffffffffULL, - 0x5555555555555555ULL, - 0xaaaaaaaaaaaaaaaaULL, - 0x1111111111111111ULL, - 0x2222222222222222ULL, - 0x4444444444444444ULL, - 0x8888888888888888ULL, - 0x3333333333333333ULL, - 0x6666666666666666ULL, - 0x9999999999999999ULL, - 0xccccccccccccccccULL, - 0x7777777777777777ULL, - 0xbbbbbbbbbbbbbbbbULL, - 0xddddddddddddddddULL, - 0xeeeeeeeeeeeeeeeeULL, - 0x7a6c7258554e494cULL, /* yeah ;-) */ -}; - -static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) -{ - printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", - (unsigned long long) pattern, - (unsigned long long) start_bad, - (unsigned long long) end_bad); - memblock_reserve(start_bad, end_bad - start_bad); -} - -static void __init memtest(u64 pattern, u64 start_phys, u64 size) -{ - u64 *p, *start, *end; - u64 start_bad, last_bad; - u64 start_phys_aligned; - const size_t incr = sizeof(pattern); - - start_phys_aligned = ALIGN(start_phys, incr); - start = __va(start_phys_aligned); - end = start + (size - (start_phys_aligned - start_phys)) / incr; - start_bad = 0; - last_bad = 0; - - for (p = start; p < end; p++) - *p = pattern; - - for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) - continue; - if (start_phys_aligned == last_bad + incr) { - last_bad += incr; - continue; - } - if (start_bad) - reserve_bad_mem(pattern, start_bad, last_bad + incr); - start_bad = last_bad = start_phys_aligned; - } - if (start_bad) - reserve_bad_mem(pattern, start_bad, last_bad + incr); -} - -static void __init do_one_pass(u64 pattern, u64 start, u64 end) -{ - u64 i; - phys_addr_t this_start, this_end; - - for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { - this_start = clamp_t(phys_addr_t, this_start, start, end); - this_end = clamp_t(phys_addr_t, this_end, start, end); - if (this_start < this_end) { - printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", - (unsigned long long)this_start, - (unsigned long long)this_end, - (unsigned long long)cpu_to_be64(pattern)); - memtest(pattern, this_start, this_end - this_start); - } - } -} - -/* default is disabled */ -static int memtest_pattern __initdata; - -static int __init parse_memtest(char *arg) -{ - if (arg) - memtest_pattern = simple_strtoul(arg, NULL, 0); - else - memtest_pattern = ARRAY_SIZE(patterns); - - return 0; -} - -early_param("memtest", parse_memtest); - -void __init early_memtest(unsigned long start, unsigned long end) -{ - unsigned int i; - unsigned int idx = 0; - - if (!memtest_pattern) - return; - - printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); - for (i = 0; i < memtest_pattern; i++) { - idx = i % ARRAY_SIZE(patterns); - do_one_pass(patterns[idx], start, end); - } - - if (idx > 0) { - printk(KERN_INFO "early_memtest: wipe out " - "test pattern from memory\n"); - /* additional test with pattern 0 will do this */ - do_one_pass(0, start, end); - } -} diff --git a/ANDROID_3.4.5/arch/x86/mm/mmap.c b/ANDROID_3.4.5/arch/x86/mm/mmap.c deleted file mode 100644 index 845df683..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/mmap.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Flexible mmap layout support - * - * Based on code by Ingo Molnar and Andi Kleen, copyrighted - * as follows: - * - * Copyright 2003-2009 Red Hat Inc. - * All Rights Reserved. - * Copyright 2005 Andi Kleen, SUSE Labs. - * Copyright 2007 Jiri Kosina, SUSE Labs. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/personality.h> -#include <linux/mm.h> -#include <linux/random.h> -#include <linux/limits.h> -#include <linux/sched.h> -#include <asm/elf.h> - -struct __read_mostly va_alignment va_align = { - .flags = -1, -}; - -static unsigned int stack_maxrandom_size(void) -{ - unsigned int max = 0; - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) { - max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; - } - - return max; -} - -/* - * Top of mmap area (just below the process stack). - * - * Leave an at least ~128 MB hole with possible stack randomization. - */ -#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) -#define MAX_GAP (TASK_SIZE/6*5) - -static int mmap_is_legacy(void) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -static unsigned long mmap_rnd(void) -{ - unsigned long rnd = 0; - - /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ - if (current->flags & PF_RANDOMIZE) { - if (mmap_is_ia32()) - rnd = get_random_int() % (1<<8); - else - rnd = get_random_int() % (1<<28); - } - return rnd << PAGE_SHIFT; -} - -static unsigned long mmap_base(void) -{ - unsigned long gap = rlimit(RLIMIT_STACK); - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); -} - -/* - * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 - * does, but not when emulating X86_32 - */ -static unsigned long mmap_legacy_base(void) -{ - if (mmap_is_ia32()) - return TASK_UNMAPPED_BASE; - else - return TASK_UNMAPPED_BASE + mmap_rnd(); -} - -/* - * This function, called very early during the creation of a new - * process VM image, sets up which VM layout function to use: - */ -void arch_pick_mmap_layout(struct mm_struct *mm) -{ - if (mmap_is_legacy()) { - mm->mmap_base = mmap_legacy_base(); - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; - } else { - mm->mmap_base = mmap_base(); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; - } -} diff --git a/ANDROID_3.4.5/arch/x86/mm/mmio-mod.c b/ANDROID_3.4.5/arch/x86/mm/mmio-mod.c deleted file mode 100644 index dc0b7277..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/mmio-mod.c +++ /dev/null @@ -1,480 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2005 - * Jeff Muizelaar, 2006, 2007 - * Pekka Paalanen, 2008 <pq@iki.fi> - * - * Derived from the read-mod example from relay-examples by Tom Zanussi. - */ - -#define pr_fmt(fmt) "mmiotrace: " fmt - -#define DEBUG 1 - -#include <linux/module.h> -#include <linux/debugfs.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <linux/io.h> -#include <linux/kallsyms.h> -#include <asm/pgtable.h> -#include <linux/mmiotrace.h> -#include <asm/e820.h> /* for ISA_START_ADDRESS */ -#include <linux/atomic.h> -#include <linux/percpu.h> -#include <linux/cpu.h> - -#include "pf_in.h" - -struct trap_reason { - unsigned long addr; - unsigned long ip; - enum reason_type type; - int active_traces; -}; - -struct remap_trace { - struct list_head list; - struct kmmio_probe probe; - resource_size_t phys; - unsigned long id; -}; - -/* Accessed per-cpu. */ -static DEFINE_PER_CPU(struct trap_reason, pf_reason); -static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); - -static DEFINE_MUTEX(mmiotrace_mutex); -static DEFINE_SPINLOCK(trace_lock); -static atomic_t mmiotrace_enabled; -static LIST_HEAD(trace_list); /* struct remap_trace */ - -/* - * Locking in this file: - * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. - * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex - * and trace_lock. - * - Routines depending on is_enabled() must take trace_lock. - * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed. - * - pre/post callbacks assume the effect of is_enabled() being true. - */ - -/* module parameters */ -static unsigned long filter_offset; -static bool nommiotrace; -static bool trace_pc; - -module_param(filter_offset, ulong, 0); -module_param(nommiotrace, bool, 0); -module_param(trace_pc, bool, 0); - -MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); -MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); -MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); - -static bool is_enabled(void) -{ - return atomic_read(&mmiotrace_enabled); -} - -static void print_pte(unsigned long address) -{ - unsigned int level; - pte_t *pte = lookup_address(address, &level); - - if (!pte) { - pr_err("Error in %s: no pte for page 0x%08lx\n", - __func__, address); - return; - } - - if (level == PG_LEVEL_2M) { - pr_emerg("4MB pages are not currently supported: 0x%08lx\n", - address); - BUG(); - } - pr_info("pte for 0x%lx: 0x%llx 0x%llx\n", - address, - (unsigned long long)pte_val(*pte), - (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); -} - -/* - * For some reason the pre/post pairs have been called in an - * unmatched order. Report and die. - */ -static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) -{ - const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", - addr, my_reason->addr); - print_pte(addr); - print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); -#ifdef __i386__ - pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->ax, regs->bx, regs->cx, regs->dx); - pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->si, regs->di, regs->bp, regs->sp); -#else - pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", - regs->ax, regs->cx, regs->dx); - pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", - regs->si, regs->di, regs->bp, regs->sp); -#endif - put_cpu_var(pf_reason); - BUG(); -} - -static void pre(struct kmmio_probe *p, struct pt_regs *regs, - unsigned long addr) -{ - struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); - const unsigned long instptr = instruction_pointer(regs); - const enum reason_type type = get_ins_type(instptr); - struct remap_trace *trace = p->private; - - /* it doesn't make sense to have more than one active trace per cpu */ - if (my_reason->active_traces) - die_kmmio_nesting_error(regs, addr); - else - my_reason->active_traces++; - - my_reason->type = type; - my_reason->addr = addr; - my_reason->ip = instptr; - - my_trace->phys = addr - trace->probe.addr + trace->phys; - my_trace->map_id = trace->id; - - /* - * Only record the program counter when requested. - * It may taint clean-room reverse engineering. - */ - if (trace_pc) - my_trace->pc = instptr; - else - my_trace->pc = 0; - - /* - * XXX: the timestamp recorded will be *after* the tracing has been - * done, not at the time we hit the instruction. SMP implications - * on event ordering? - */ - - switch (type) { - case REG_READ: - my_trace->opcode = MMIO_READ; - my_trace->width = get_ins_mem_width(instptr); - break; - case REG_WRITE: - my_trace->opcode = MMIO_WRITE; - my_trace->width = get_ins_mem_width(instptr); - my_trace->value = get_ins_reg_val(instptr, regs); - break; - case IMM_WRITE: - my_trace->opcode = MMIO_WRITE; - my_trace->width = get_ins_mem_width(instptr); - my_trace->value = get_ins_imm_val(instptr); - break; - default: - { - unsigned char *ip = (unsigned char *)instptr; - my_trace->opcode = MMIO_UNKNOWN_OP; - my_trace->width = 0; - my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | - *(ip + 2); - } - } - put_cpu_var(cpu_trace); - put_cpu_var(pf_reason); -} - -static void post(struct kmmio_probe *p, unsigned long condition, - struct pt_regs *regs) -{ - struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); - - /* this should always return the active_trace count to 0 */ - my_reason->active_traces--; - if (my_reason->active_traces) { - pr_emerg("unexpected post handler"); - BUG(); - } - - switch (my_reason->type) { - case REG_READ: - my_trace->value = get_ins_reg_val(my_reason->ip, regs); - break; - default: - break; - } - - mmio_trace_rw(my_trace); - put_cpu_var(cpu_trace); - put_cpu_var(pf_reason); -} - -static void ioremap_trace_core(resource_size_t offset, unsigned long size, - void __iomem *addr) -{ - static atomic_t next_id; - struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); - /* These are page-unaligned. */ - struct mmiotrace_map map = { - .phys = offset, - .virt = (unsigned long)addr, - .len = size, - .opcode = MMIO_PROBE - }; - - if (!trace) { - pr_err("kmalloc failed in ioremap\n"); - return; - } - - *trace = (struct remap_trace) { - .probe = { - .addr = (unsigned long)addr, - .len = size, - .pre_handler = pre, - .post_handler = post, - .private = trace - }, - .phys = offset, - .id = atomic_inc_return(&next_id) - }; - map.map_id = trace->id; - - spin_lock_irq(&trace_lock); - if (!is_enabled()) { - kfree(trace); - goto not_enabled; - } - - mmio_trace_mapping(&map); - list_add_tail(&trace->list, &trace_list); - if (!nommiotrace) - register_kmmio_probe(&trace->probe); - -not_enabled: - spin_unlock_irq(&trace_lock); -} - -void mmiotrace_ioremap(resource_size_t offset, unsigned long size, - void __iomem *addr) -{ - if (!is_enabled()) /* recheck and proper locking in *_core() */ - return; - - pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n", - (unsigned long long)offset, size, addr); - if ((filter_offset) && (offset != filter_offset)) - return; - ioremap_trace_core(offset, size, addr); -} - -static void iounmap_trace_core(volatile void __iomem *addr) -{ - struct mmiotrace_map map = { - .phys = 0, - .virt = (unsigned long)addr, - .len = 0, - .opcode = MMIO_UNPROBE - }; - struct remap_trace *trace; - struct remap_trace *tmp; - struct remap_trace *found_trace = NULL; - - pr_debug("Unmapping %p.\n", addr); - - spin_lock_irq(&trace_lock); - if (!is_enabled()) - goto not_enabled; - - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - if ((unsigned long)addr == trace->probe.addr) { - if (!nommiotrace) - unregister_kmmio_probe(&trace->probe); - list_del(&trace->list); - found_trace = trace; - break; - } - } - map.map_id = (found_trace) ? found_trace->id : -1; - mmio_trace_mapping(&map); - -not_enabled: - spin_unlock_irq(&trace_lock); - if (found_trace) { - synchronize_rcu(); /* unregister_kmmio_probe() requirement */ - kfree(found_trace); - } -} - -void mmiotrace_iounmap(volatile void __iomem *addr) -{ - might_sleep(); - if (is_enabled()) /* recheck and proper locking in *_core() */ - iounmap_trace_core(addr); -} - -int mmiotrace_printk(const char *fmt, ...) -{ - int ret = 0; - va_list args; - unsigned long flags; - va_start(args, fmt); - - spin_lock_irqsave(&trace_lock, flags); - if (is_enabled()) - ret = mmio_trace_printk(fmt, args); - spin_unlock_irqrestore(&trace_lock, flags); - - va_end(args); - return ret; -} -EXPORT_SYMBOL(mmiotrace_printk); - -static void clear_trace_list(void) -{ - struct remap_trace *trace; - struct remap_trace *tmp; - - /* - * No locking required, because the caller ensures we are in a - * critical section via mutex, and is_enabled() is false, - * i.e. nothing can traverse or modify this list. - * Caller also ensures is_enabled() cannot change. - */ - list_for_each_entry(trace, &trace_list, list) { - pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n", - trace->probe.addr, trace->probe.len); - if (!nommiotrace) - unregister_kmmio_probe(&trace->probe); - } - synchronize_rcu(); /* unregister_kmmio_probe() requirement */ - - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - list_del(&trace->list); - kfree(trace); - } -} - -#ifdef CONFIG_HOTPLUG_CPU -static cpumask_var_t downed_cpus; - -static void enter_uniprocessor(void) -{ - int cpu; - int err; - - if (downed_cpus == NULL && - !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { - pr_notice("Failed to allocate mask\n"); - goto out; - } - - get_online_cpus(); - cpumask_copy(downed_cpus, cpu_online_mask); - cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); - if (num_online_cpus() > 1) - pr_notice("Disabling non-boot CPUs...\n"); - put_online_cpus(); - - for_each_cpu(cpu, downed_cpus) { - err = cpu_down(cpu); - if (!err) - pr_info("CPU%d is down.\n", cpu); - else - pr_err("Error taking CPU%d down: %d\n", cpu, err); - } -out: - if (num_online_cpus() > 1) - pr_warning("multiple CPUs still online, may miss events.\n"); -} - -/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, - but this whole function is ifdefed CONFIG_HOTPLUG_CPU */ -static void __ref leave_uniprocessor(void) -{ - int cpu; - int err; - - if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) - return; - pr_notice("Re-enabling CPUs...\n"); - for_each_cpu(cpu, downed_cpus) { - err = cpu_up(cpu); - if (!err) - pr_info("enabled CPU%d.\n", cpu); - else - pr_err("cannot re-enable CPU%d: %d\n", cpu, err); - } -} - -#else /* !CONFIG_HOTPLUG_CPU */ -static void enter_uniprocessor(void) -{ - if (num_online_cpus() > 1) - pr_warning("multiple CPUs are online, may miss events. " - "Suggest booting with maxcpus=1 kernel argument.\n"); -} - -static void leave_uniprocessor(void) -{ -} -#endif - -void enable_mmiotrace(void) -{ - mutex_lock(&mmiotrace_mutex); - if (is_enabled()) - goto out; - - if (nommiotrace) - pr_info("MMIO tracing disabled.\n"); - kmmio_init(); - enter_uniprocessor(); - spin_lock_irq(&trace_lock); - atomic_inc(&mmiotrace_enabled); - spin_unlock_irq(&trace_lock); - pr_info("enabled.\n"); -out: - mutex_unlock(&mmiotrace_mutex); -} - -void disable_mmiotrace(void) -{ - mutex_lock(&mmiotrace_mutex); - if (!is_enabled()) - goto out; - - spin_lock_irq(&trace_lock); - atomic_dec(&mmiotrace_enabled); - BUG_ON(is_enabled()); - spin_unlock_irq(&trace_lock); - - clear_trace_list(); /* guarantees: no more kmmio callbacks */ - leave_uniprocessor(); - kmmio_cleanup(); - pr_info("disabled.\n"); -out: - mutex_unlock(&mmiotrace_mutex); -} diff --git a/ANDROID_3.4.5/arch/x86/mm/numa.c b/ANDROID_3.4.5/arch/x86/mm/numa.c deleted file mode 100644 index 19d3fa08..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/numa.c +++ /dev/null @@ -1,834 +0,0 @@ -/* Common code for 32 and 64-bit NUMA */ -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/mmzone.h> -#include <linux/ctype.h> -#include <linux/module.h> -#include <linux/nodemask.h> -#include <linux/sched.h> -#include <linux/topology.h> - -#include <asm/e820.h> -#include <asm/proto.h> -#include <asm/dma.h> -#include <asm/acpi.h> -#include <asm/amd_nb.h> - -#include "numa_internal.h" - -int __initdata numa_off; -nodemask_t numa_nodes_parsed __initdata; - -struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; -EXPORT_SYMBOL(node_data); - -static struct numa_meminfo numa_meminfo -#ifndef CONFIG_MEMORY_HOTPLUG -__initdata -#endif -; - -static int numa_distance_cnt; -static u8 *numa_distance; - -static __init int numa_setup(char *opt) -{ - if (!opt) - return -EINVAL; - if (!strncmp(opt, "off", 3)) - numa_off = 1; -#ifdef CONFIG_NUMA_EMU - if (!strncmp(opt, "fake=", 5)) - numa_emu_cmdline(opt + 5); -#endif -#ifdef CONFIG_ACPI_NUMA - if (!strncmp(opt, "noacpi", 6)) - acpi_numa = -1; -#endif - return 0; -} -early_param("numa", numa_setup); - -/* - * apicid, cpu, node mappings - */ -s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { - [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE -}; - -int __cpuinit numa_cpu_node(int cpu) -{ - int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); - - if (apicid != BAD_APICID) - return __apicid_to_node[apicid]; - return NUMA_NO_NODE; -} - -cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; -EXPORT_SYMBOL(node_to_cpumask_map); - -/* - * Map cpu index to node index - */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - - /* early setting, no percpu area yet */ - if (cpu_to_node_map) { - cpu_to_node_map[cpu] = node; - return; - } - -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { - printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); - dump_stack(); - return; - } -#endif - per_cpu(x86_cpu_to_node_map, cpu) = node; - - if (node != NUMA_NO_NODE) - set_cpu_numa_node(cpu, node); -} - -void __cpuinit numa_clear_node(int cpu) -{ - numa_set_node(cpu, NUMA_NO_NODE); -} - -/* - * Allocate node_to_cpumask_map based on number of available nodes - * Requires node_possible_map to be valid. - * - * Note: cpumask_of_node() is not valid until after this is done. - * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) - */ -void __init setup_node_to_cpumask_map(void) -{ - unsigned int node, num = 0; - - /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } - - /* allocate the map */ - for (node = 0; node < nr_node_ids; node++) - alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); - - /* cpumask_of_node() will now work */ - pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); -} - -static int __init numa_add_memblk_to(int nid, u64 start, u64 end, - struct numa_meminfo *mi) -{ - /* ignore zero length blks */ - if (start == end) - return 0; - - /* whine about and ignore invalid blks */ - if (start > end || nid < 0 || nid >= MAX_NUMNODES) { - pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", - nid, start, end); - return 0; - } - - if (mi->nr_blks >= NR_NODE_MEMBLKS) { - pr_err("NUMA: too many memblk ranges\n"); - return -EINVAL; - } - - mi->blk[mi->nr_blks].start = start; - mi->blk[mi->nr_blks].end = end; - mi->blk[mi->nr_blks].nid = nid; - mi->nr_blks++; - return 0; -} - -/** - * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo - * @idx: Index of memblk to remove - * @mi: numa_meminfo to remove memblk from - * - * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and - * decrementing @mi->nr_blks. - */ -void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) -{ - mi->nr_blks--; - memmove(&mi->blk[idx], &mi->blk[idx + 1], - (mi->nr_blks - idx) * sizeof(mi->blk[0])); -} - -/** - * numa_add_memblk - Add one numa_memblk to numa_meminfo - * @nid: NUMA node ID of the new memblk - * @start: Start address of the new memblk - * @end: End address of the new memblk - * - * Add a new memblk to the default numa_meminfo. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int __init numa_add_memblk(int nid, u64 start, u64 end) -{ - return numa_add_memblk_to(nid, start, end, &numa_meminfo); -} - -/* Initialize NODE_DATA for a node on the local memory */ -static void __init setup_node_data(int nid, u64 start, u64 end) -{ - const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); - bool remapped = false; - u64 nd_pa; - void *nd; - int tnid; - - /* - * Don't confuse VM with a node that doesn't have the - * minimum amount of memory: - */ - if (end && (end - start) < NODE_MIN_SIZE) - return; - - /* initialize remap allocator before aligning to ZONE_ALIGN */ - init_alloc_remap(nid, start, end); - - start = roundup(start, ZONE_ALIGN); - - printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", - nid, start, end); - - /* - * Allocate node data. Try remap allocator first, node-local - * memory and then any node. Never allocate in DMA zone. - */ - nd = alloc_remap(nid, nd_size); - if (nd) { - nd_pa = __pa(nd); - remapped = true; - } else { - nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); - if (!nd_pa) { - pr_err("Cannot find %zu bytes in node %d\n", - nd_size, nid); - return; - } - nd = __va(nd_pa); - } - - /* report and initialize */ - printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", - nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); - tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); - if (!remapped && tnid != nid) - printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); - - node_data[nid] = nd; - memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); - NODE_DATA(nid)->node_id = nid; - NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; - NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; - - node_set_online(nid); -} - -/** - * numa_cleanup_meminfo - Cleanup a numa_meminfo - * @mi: numa_meminfo to clean up - * - * Sanitize @mi by merging and removing unncessary memblks. Also check for - * conflicts and clear unused memblks. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int __init numa_cleanup_meminfo(struct numa_meminfo *mi) -{ - const u64 low = 0; - const u64 high = PFN_PHYS(max_pfn); - int i, j, k; - - /* first, trim all entries */ - for (i = 0; i < mi->nr_blks; i++) { - struct numa_memblk *bi = &mi->blk[i]; - - /* make sure all blocks are inside the limits */ - bi->start = max(bi->start, low); - bi->end = min(bi->end, high); - - /* and there's no empty block */ - if (bi->start >= bi->end) - numa_remove_memblk_from(i--, mi); - } - - /* merge neighboring / overlapping entries */ - for (i = 0; i < mi->nr_blks; i++) { - struct numa_memblk *bi = &mi->blk[i]; - - for (j = i + 1; j < mi->nr_blks; j++) { - struct numa_memblk *bj = &mi->blk[j]; - u64 start, end; - - /* - * See whether there are overlapping blocks. Whine - * about but allow overlaps of the same nid. They - * will be merged below. - */ - if (bi->end > bj->start && bi->start < bj->end) { - if (bi->nid != bj->nid) { - pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->nid, bj->start, bj->end); - return -EINVAL; - } - pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->start, bj->end); - } - - /* - * Join together blocks on the same node, holes - * between which don't overlap with memory on other - * nodes. - */ - if (bi->nid != bj->nid) - continue; - start = min(bi->start, bj->start); - end = max(bi->end, bj->end); - for (k = 0; k < mi->nr_blks; k++) { - struct numa_memblk *bk = &mi->blk[k]; - - if (bi->nid == bk->nid) - continue; - if (start < bk->end && end > bk->start) - break; - } - if (k < mi->nr_blks) - continue; - printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", - bi->nid, bi->start, bi->end, bj->start, bj->end, - start, end); - bi->start = start; - bi->end = end; - numa_remove_memblk_from(j--, mi); - } - } - - /* clear unused ones */ - for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { - mi->blk[i].start = mi->blk[i].end = 0; - mi->blk[i].nid = NUMA_NO_NODE; - } - - return 0; -} - -/* - * Set nodes, which have memory in @mi, in *@nodemask. - */ -static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, - const struct numa_meminfo *mi) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mi->blk); i++) - if (mi->blk[i].start != mi->blk[i].end && - mi->blk[i].nid != NUMA_NO_NODE) - node_set(mi->blk[i].nid, *nodemask); -} - -/** - * numa_reset_distance - Reset NUMA distance table - * - * The current table is freed. The next numa_set_distance() call will - * create a new one. - */ -void __init numa_reset_distance(void) -{ - size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); - - /* numa_distance could be 1LU marking allocation failure, test cnt */ - if (numa_distance_cnt) - memblock_free(__pa(numa_distance), size); - numa_distance_cnt = 0; - numa_distance = NULL; /* enable table creation */ -} - -static int __init numa_alloc_distance(void) -{ - nodemask_t nodes_parsed; - size_t size; - int i, j, cnt = 0; - u64 phys; - - /* size the new table and allocate it */ - nodes_parsed = numa_nodes_parsed; - numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); - - for_each_node_mask(i, nodes_parsed) - cnt = i; - cnt++; - size = cnt * cnt * sizeof(numa_distance[0]); - - phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), - size, PAGE_SIZE); - if (!phys) { - pr_warning("NUMA: Warning: can't allocate distance table!\n"); - /* don't retry until explicitly reset */ - numa_distance = (void *)1LU; - return -ENOMEM; - } - memblock_reserve(phys, size); - - numa_distance = __va(phys); - numa_distance_cnt = cnt; - - /* fill with the default distances */ - for (i = 0; i < cnt; i++) - for (j = 0; j < cnt; j++) - numa_distance[i * cnt + j] = i == j ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); - - return 0; -} - -/** - * numa_set_distance - Set NUMA distance from one NUMA to another - * @from: the 'from' node to set distance - * @to: the 'to' node to set distance - * @distance: NUMA distance - * - * Set the distance from node @from to @to to @distance. If distance table - * doesn't exist, one which is large enough to accommodate all the currently - * known nodes will be created. - * - * If such table cannot be allocated, a warning is printed and further - * calls are ignored until the distance table is reset with - * numa_reset_distance(). - * - * If @from or @to is higher than the highest known node or lower than zero - * at the time of table creation or @distance doesn't make sense, the call - * is ignored. - * This is to allow simplification of specific NUMA config implementations. - */ -void __init numa_set_distance(int from, int to, int distance) -{ - if (!numa_distance && numa_alloc_distance() < 0) - return; - - if (from >= numa_distance_cnt || to >= numa_distance_cnt || - from < 0 || to < 0) { - pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", - from, to, distance); - return; - } - - if ((u8)distance != distance || - (from == to && distance != LOCAL_DISTANCE)) { - pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", - from, to, distance); - return; - } - - numa_distance[from * numa_distance_cnt + to] = distance; -} - -int __node_distance(int from, int to) -{ - if (from >= numa_distance_cnt || to >= numa_distance_cnt) - return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; - return numa_distance[from * numa_distance_cnt + to]; -} -EXPORT_SYMBOL(__node_distance); - -/* - * Sanity check to catch more bad NUMA configurations (they are amazingly - * common). Make sure the nodes cover all memory. - */ -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) -{ - u64 numaram, e820ram; - int i; - - numaram = 0; - for (i = 0; i < mi->nr_blks; i++) { - u64 s = mi->blk[i].start >> PAGE_SHIFT; - u64 e = mi->blk[i].end >> PAGE_SHIFT; - numaram += e - s; - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); - if ((s64)numaram < 0) - numaram = 0; - } - - e820ram = max_pfn - absent_pages_in_range(0, max_pfn); - - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", - (numaram << PAGE_SHIFT) >> 20, - (e820ram << PAGE_SHIFT) >> 20); - return false; - } - return true; -} - -static int __init numa_register_memblks(struct numa_meminfo *mi) -{ - unsigned long uninitialized_var(pfn_align); - int i, nid; - - /* Account for nodes with cpus and no memory */ - node_possible_map = numa_nodes_parsed; - numa_nodemask_from_meminfo(&node_possible_map, mi); - if (WARN_ON(nodes_empty(node_possible_map))) - return -EINVAL; - - for (i = 0; i < mi->nr_blks; i++) { - struct numa_memblk *mb = &mi->blk[i]; - memblock_set_node(mb->start, mb->end - mb->start, mb->nid); - } - - /* - * If sections array is gonna be used for pfn -> nid mapping, check - * whether its granularity is fine enough. - */ -#ifdef NODE_NOT_IN_PAGE_FLAGS - pfn_align = node_map_pfn_alignment(); - if (pfn_align && pfn_align < PAGES_PER_SECTION) { - printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", - PFN_PHYS(pfn_align) >> 20, - PFN_PHYS(PAGES_PER_SECTION) >> 20); - return -EINVAL; - } -#endif - if (!numa_meminfo_cover_memory(mi)) - return -EINVAL; - - /* Finally register nodes. */ - for_each_node_mask(nid, node_possible_map) { - u64 start = PFN_PHYS(max_pfn); - u64 end = 0; - - for (i = 0; i < mi->nr_blks; i++) { - if (nid != mi->blk[i].nid) - continue; - start = min(mi->blk[i].start, start); - end = max(mi->blk[i].end, end); - } - - if (start < end) - setup_node_data(nid, start, end); - } - - /* Dump memblock with node info and return. */ - memblock_dump_all(); - return 0; -} - -/* - * There are unfortunately some poorly designed mainboards around that - * only connect memory to a single CPU. This breaks the 1:1 cpu->node - * mapping. To avoid this fill in the mapping for all possible CPUs, - * as the number of CPUs is not known yet. We round robin the existing - * nodes. - */ -static void __init numa_init_array(void) -{ - int rr, i; - - rr = first_node(node_online_map); - for (i = 0; i < nr_cpu_ids; i++) { - if (early_cpu_to_node(i) != NUMA_NO_NODE) - continue; - numa_set_node(i, rr); - rr = next_node(rr, node_online_map); - if (rr == MAX_NUMNODES) - rr = first_node(node_online_map); - } -} - -static int __init numa_init(int (*init_func)(void)) -{ - int i; - int ret; - - for (i = 0; i < MAX_LOCAL_APIC; i++) - set_apicid_to_node(i, NUMA_NO_NODE); - - nodes_clear(numa_nodes_parsed); - nodes_clear(node_possible_map); - nodes_clear(node_online_map); - memset(&numa_meminfo, 0, sizeof(numa_meminfo)); - WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); - numa_reset_distance(); - - ret = init_func(); - if (ret < 0) - return ret; - ret = numa_cleanup_meminfo(&numa_meminfo); - if (ret < 0) - return ret; - - numa_emulation(&numa_meminfo, numa_distance_cnt); - - ret = numa_register_memblks(&numa_meminfo); - if (ret < 0) - return ret; - - for (i = 0; i < nr_cpu_ids; i++) { - int nid = early_cpu_to_node(i); - - if (nid == NUMA_NO_NODE) - continue; - if (!node_online(nid)) - numa_clear_node(i); - } - numa_init_array(); - return 0; -} - -/** - * dummy_numa_init - Fallback dummy NUMA init - * - * Used if there's no underlying NUMA architecture, NUMA initialization - * fails, or NUMA is disabled on the command line. - * - * Must online at least one node and add memory blocks that cover all - * allowed memory. This function must not fail. - */ -static int __init dummy_numa_init(void) -{ - printk(KERN_INFO "%s\n", - numa_off ? "NUMA turned off" : "No NUMA configuration found"); - printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", - 0LLU, PFN_PHYS(max_pfn)); - - node_set(0, numa_nodes_parsed); - numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); - - return 0; -} - -/** - * x86_numa_init - Initialize NUMA - * - * Try each configured NUMA initialization method until one succeeds. The - * last fallback is dummy single node config encomapssing whole memory and - * never fails. - */ -void __init x86_numa_init(void) -{ - if (!numa_off) { -#ifdef CONFIG_X86_NUMAQ - if (!numa_init(numaq_numa_init)) - return; -#endif -#ifdef CONFIG_ACPI_NUMA - if (!numa_init(x86_acpi_numa_init)) - return; -#endif -#ifdef CONFIG_AMD_NUMA - if (!numa_init(amd_numa_init)) - return; -#endif - } - - numa_init(dummy_numa_init); -} - -static __init int find_near_online_node(int node) -{ - int n, val; - int min_val = INT_MAX; - int best_node = -1; - - for_each_online_node(n) { - val = node_distance(node, n); - - if (val < min_val) { - min_val = val; - best_node = n; - } - } - - return best_node; -} - -/* - * Setup early cpu_to_node. - * - * Populate cpu_to_node[] only if x86_cpu_to_apicid[], - * and apicid_to_node[] tables have valid entries for a CPU. - * This means we skip cpu_to_node[] initialisation for NUMA - * emulation and faking node case (when running a kernel compiled - * for NUMA on a non NUMA box), which is OK as cpu_to_node[] - * is already initialized in a round robin manner at numa_init_array, - * prior to this call, and this initialization is good enough - * for the fake NUMA cases. - * - * Called before the per_cpu areas are setup. - */ -void __init init_cpu_to_node(void) -{ - int cpu; - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - - BUG_ON(cpu_to_apicid == NULL); - - for_each_possible_cpu(cpu) { - int node = numa_cpu_node(cpu); - - if (node == NUMA_NO_NODE) - continue; - if (!node_online(node)) - node = find_near_online_node(node); - numa_set_node(cpu, node); - } -} - -#ifndef CONFIG_DEBUG_PER_CPU_MAPS - -# ifndef CONFIG_NUMA_EMU -void __cpuinit numa_add_cpu(int cpu) -{ - cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} -# endif /* !CONFIG_NUMA_EMU */ - -#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ - -int __cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) { - printk(KERN_WARNING - "cpu_to_node(%d): usage too early!\n", cpu); - dump_stack(); - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} -EXPORT_SYMBOL(__cpu_to_node); - -/* - * Same function as cpu_to_node() but used if called before the - * per_cpu areas are setup. - */ -int early_cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - if (!cpu_possible(cpu)) { - printk(KERN_WARNING - "early_cpu_to_node(%d): no per_cpu area!\n", cpu); - dump_stack(); - return NUMA_NO_NODE; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} - -void debug_cpumask_set_cpu(int cpu, int node, bool enable) -{ - struct cpumask *mask; - char buf[64]; - - if (node == NUMA_NO_NODE) { - /* early_cpu_to_node() already emits a warning and trace */ - return; - } - mask = node_to_cpumask_map[node]; - if (!mask) { - pr_err("node_to_cpumask_map[%i] NULL\n", node); - dump_stack(); - return; - } - - if (enable) - cpumask_set_cpu(cpu, mask); - else - cpumask_clear_cpu(cpu, mask); - - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", - enable ? "numa_add_cpu" : "numa_remove_cpu", - cpu, node, buf); - return; -} - -# ifndef CONFIG_NUMA_EMU -static void __cpuinit numa_set_cpumask(int cpu, bool enable) -{ - debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); -} - -void __cpuinit numa_add_cpu(int cpu) -{ - numa_set_cpumask(cpu, true); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - numa_set_cpumask(cpu, false); -} -# endif /* !CONFIG_NUMA_EMU */ - -/* - * Returns a pointer to the bitmask of CPUs on Node 'node'. - */ -const struct cpumask *cpumask_of_node(int node) -{ - if (node >= nr_node_ids) { - printk(KERN_WARNING - "cpumask_of_node(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return cpu_none_mask; - } - if (node_to_cpumask_map[node] == NULL) { - printk(KERN_WARNING - "cpumask_of_node(%d): no node_to_cpumask_map!\n", - node); - dump_stack(); - return cpu_online_mask; - } - return node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(cpumask_of_node); - -#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ - -#ifdef CONFIG_MEMORY_HOTPLUG -int memory_add_physaddr_to_nid(u64 start) -{ - struct numa_meminfo *mi = &numa_meminfo; - int nid = mi->blk[0].nid; - int i; - - for (i = 0; i < mi->nr_blks; i++) - if (mi->blk[i].start <= start && mi->blk[i].end > start) - nid = mi->blk[i].nid; - return nid; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/numa_32.c b/ANDROID_3.4.5/arch/x86/mm/numa_32.c deleted file mode 100644 index 534255a3..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/numa_32.c +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation - * August 2002: added remote node KVA remap - Martin J. Bligh - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/module.h> - -#include "numa_internal.h" - -#ifdef CONFIG_DISCONTIGMEM -/* - * 4) physnode_map - the mapping between a pfn and owning node - * physnode_map keeps track of the physical memory layout of a generic - * numa node on a 64Mb break (each element of the array will - * represent 64Mb of memory and will be marked by the node id. so, - * if the first gig is on node 0, and the second gig is on node 1 - * physnode_map will contain: - * - * physnode_map[0-15] = 0; - * physnode_map[16-31] = 1; - * physnode_map[32- ] = -1; - */ -s8 physnode_map[MAX_SECTIONS] __read_mostly = { [0 ... (MAX_SECTIONS - 1)] = -1}; -EXPORT_SYMBOL(physnode_map); - -void memory_present(int nid, unsigned long start, unsigned long end) -{ - unsigned long pfn; - - printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n", - nid, start, end); - printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); - printk(KERN_DEBUG " "); - for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { - physnode_map[pfn / PAGES_PER_SECTION] = nid; - printk(KERN_CONT "%lx ", pfn); - } - printk(KERN_CONT "\n"); -} - -unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long nr_pages = end_pfn - start_pfn; - - if (!nr_pages) - return 0; - - return (nr_pages + 1) * sizeof(struct page); -} -#endif - -extern unsigned long highend_pfn, highstart_pfn; - -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - -static void *node_remap_start_vaddr[MAX_NUMNODES]; -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -/* - * Remap memory allocator - */ -static unsigned long node_remap_start_pfn[MAX_NUMNODES]; -static void *node_remap_end_vaddr[MAX_NUMNODES]; -static void *node_remap_alloc_vaddr[MAX_NUMNODES]; - -/** - * alloc_remap - Allocate remapped memory - * @nid: NUMA node to allocate memory from - * @size: The size of allocation - * - * Allocate @size bytes from the remap area of NUMA node @nid. The - * size of the remap area is predetermined by init_alloc_remap() and - * only the callers considered there should call this function. For - * more info, please read the comment on top of init_alloc_remap(). - * - * The caller must be ready to handle allocation failure from this - * function and fall back to regular memory allocator in such cases. - * - * CONTEXT: - * Single CPU early boot context. - * - * RETURNS: - * Pointer to the allocated memory on success, %NULL on failure. - */ -void *alloc_remap(int nid, unsigned long size) -{ - void *allocation = node_remap_alloc_vaddr[nid]; - - size = ALIGN(size, L1_CACHE_BYTES); - - if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) - return NULL; - - node_remap_alloc_vaddr[nid] += size; - memset(allocation, 0, size); - - return allocation; -} - -#ifdef CONFIG_HIBERNATION -/** - * resume_map_numa_kva - add KVA mapping to the temporary page tables created - * during resume from hibernation - * @pgd_base - temporary resume page directory - */ -void resume_map_numa_kva(pgd_t *pgd_base) -{ - int node; - - for_each_online_node(node) { - unsigned long start_va, start_pfn, nr_pages, pfn; - - start_va = (unsigned long)node_remap_start_vaddr[node]; - start_pfn = node_remap_start_pfn[node]; - nr_pages = (node_remap_end_vaddr[node] - - node_remap_start_vaddr[node]) >> PAGE_SHIFT; - - printk(KERN_DEBUG "%s: node %d\n", __func__, node); - - for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { - unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); - pgd_t *pgd = pgd_base + pgd_index(vaddr); - pud_t *pud = pud_offset(pgd, vaddr); - pmd_t *pmd = pmd_offset(pud, vaddr); - - set_pmd(pmd, pfn_pmd(start_pfn + pfn, - PAGE_KERNEL_LARGE_EXEC)); - - printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", - __func__, vaddr, start_pfn + pfn); - } - } -} -#endif - -/** - * init_alloc_remap - Initialize remap allocator for a NUMA node - * @nid: NUMA node to initizlie remap allocator for - * - * NUMA nodes may end up without any lowmem. As allocating pgdat and - * memmap on a different node with lowmem is inefficient, a special - * remap allocator is implemented which can be used by alloc_remap(). - * - * For each node, the amount of memory which will be necessary for - * pgdat and memmap is calculated and two memory areas of the size are - * allocated - one in the node and the other in lowmem; then, the area - * in the node is remapped to the lowmem area. - * - * As pgdat and memmap must be allocated in lowmem anyway, this - * doesn't waste lowmem address space; however, the actual lowmem - * which gets remapped over is wasted. The amount shouldn't be - * problematic on machines this feature will be used. - * - * Initialization failure isn't fatal. alloc_remap() is used - * opportunistically and the callers will fall back to other memory - * allocation mechanisms on failure. - */ -void __init init_alloc_remap(int nid, u64 start, u64 end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long size, pfn; - u64 node_pa, remap_pa; - void *remap_va; - - /* - * The acpi/srat node info can show hot-add memroy zones where - * memory could be added but not currently present. - */ - printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", - nid, start_pfn, end_pfn); - - /* calculate the necessary space aligned to large page size */ - size = node_memmap_size_bytes(nid, start_pfn, end_pfn); - size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); - size = ALIGN(size, LARGE_PAGE_BYTES); - - /* allocate node memory and the lowmem remap area */ - node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); - if (!node_pa) { - pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", - size, nid); - return; - } - memblock_reserve(node_pa, size); - - remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, - max_low_pfn << PAGE_SHIFT, - size, LARGE_PAGE_BYTES); - if (!remap_pa) { - pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", - size, nid); - memblock_free(node_pa, size); - return; - } - memblock_reserve(remap_pa, size); - remap_va = phys_to_virt(remap_pa); - - /* perform actual remap */ - for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) - set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), - (node_pa >> PAGE_SHIFT) + pfn, - PAGE_KERNEL_LARGE); - - /* initialize remap allocator parameters */ - node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; - node_remap_start_vaddr[nid] = remap_va; - node_remap_end_vaddr[nid] = remap_va + size; - node_remap_alloc_vaddr[nid] = remap_va; - - printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", - nid, node_pa, node_pa + size, remap_va, remap_va + size); -} - -void __init initmem_init(void) -{ - x86_numa_init(); - -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > max_low_pfn) - highstart_pfn = max_low_pfn; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - num_physpages = max_low_pfn; - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); - printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", - max_low_pfn, highstart_pfn); - - printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", - (ulong) pfn_to_kaddr(max_low_pfn)); - - printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", - (ulong) pfn_to_kaddr(highstart_pfn)); - - setup_bootmem_allocator(); -} diff --git a/ANDROID_3.4.5/arch/x86/mm/numa_64.c b/ANDROID_3.4.5/arch/x86/mm/numa_64.c deleted file mode 100644 index 92e27119..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/numa_64.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Generic VM initialization for x86-64 NUMA setups. - * Copyright 2002,2003 Andi Kleen, SuSE Labs. - */ -#include <linux/bootmem.h> - -#include "numa_internal.h" - -void __init initmem_init(void) -{ - x86_numa_init(); -} - -unsigned long __init numa_free_all_bootmem(void) -{ - unsigned long pages = 0; - int i; - - for_each_online_node(i) - pages += free_all_bootmem_node(NODE_DATA(i)); - - pages += free_low_memory_core_early(MAX_NUMNODES); - - return pages; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/numa_emulation.c b/ANDROID_3.4.5/arch/x86/mm/numa_emulation.c deleted file mode 100644 index 53489ff6..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/numa_emulation.c +++ /dev/null @@ -1,498 +0,0 @@ -/* - * NUMA emulation - */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/topology.h> -#include <linux/memblock.h> -#include <linux/bootmem.h> -#include <asm/dma.h> - -#include "numa_internal.h" - -static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; -static char *emu_cmdline __initdata; - -void __init numa_emu_cmdline(char *str) -{ - emu_cmdline = str; -} - -static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) -{ - int i; - - for (i = 0; i < mi->nr_blks; i++) - if (mi->blk[i].nid == nid) - return i; - return -ENOENT; -} - -static u64 __init mem_hole_size(u64 start, u64 end) -{ - unsigned long start_pfn = PFN_UP(start); - unsigned long end_pfn = PFN_DOWN(end); - - if (start_pfn < end_pfn) - return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn)); - return 0; -} - -/* - * Sets up nid to range from @start to @end. The return value is -errno if - * something went wrong, 0 otherwise. - */ -static int __init emu_setup_memblk(struct numa_meminfo *ei, - struct numa_meminfo *pi, - int nid, int phys_blk, u64 size) -{ - struct numa_memblk *eb = &ei->blk[ei->nr_blks]; - struct numa_memblk *pb = &pi->blk[phys_blk]; - - if (ei->nr_blks >= NR_NODE_MEMBLKS) { - pr_err("NUMA: Too many emulated memblks, failing emulation\n"); - return -EINVAL; - } - - ei->nr_blks++; - eb->start = pb->start; - eb->end = pb->start + size; - eb->nid = nid; - - if (emu_nid_to_phys[nid] == NUMA_NO_NODE) - emu_nid_to_phys[nid] = nid; - - pb->start += size; - if (pb->start >= pb->end) { - WARN_ON_ONCE(pb->start > pb->end); - numa_remove_memblk_from(phys_blk, pi); - } - - printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, - eb->start, eb->end, (eb->end - eb->start) >> 20); - return 0; -} - -/* - * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr - * to max_addr. The return value is the number of nodes allocated. - */ -static int __init split_nodes_interleave(struct numa_meminfo *ei, - struct numa_meminfo *pi, - u64 addr, u64 max_addr, int nr_nodes) -{ - nodemask_t physnode_mask = NODE_MASK_NONE; - u64 size; - int big; - int nid = 0; - int i, ret; - - if (nr_nodes <= 0) - return -1; - if (nr_nodes > MAX_NUMNODES) { - pr_info("numa=fake=%d too large, reducing to %d\n", - nr_nodes, MAX_NUMNODES); - nr_nodes = MAX_NUMNODES; - } - - /* - * Calculate target node size. x86_32 freaks on __udivdi3() so do - * the division in ulong number of pages and convert back. - */ - size = max_addr - addr - mem_hole_size(addr, max_addr); - size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); - - /* - * Calculate the number of big nodes that can be allocated as a result - * of consolidating the remainder. - */ - big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) / - FAKE_NODE_MIN_SIZE; - - size &= FAKE_NODE_MIN_HASH_MASK; - if (!size) { - pr_err("Not enough memory for each node. " - "NUMA emulation disabled.\n"); - return -1; - } - - for (i = 0; i < pi->nr_blks; i++) - node_set(pi->blk[i].nid, physnode_mask); - - /* - * Continue to fill physical nodes with fake nodes until there is no - * memory left on any of them. - */ - while (nodes_weight(physnode_mask)) { - for_each_node_mask(i, physnode_mask) { - u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); - u64 start, limit, end; - int phys_blk; - - phys_blk = emu_find_memblk_by_nid(i, pi); - if (phys_blk < 0) { - node_clear(i, physnode_mask); - continue; - } - start = pi->blk[phys_blk].start; - limit = pi->blk[phys_blk].end; - end = start + size; - - if (nid < big) - end += FAKE_NODE_MIN_SIZE; - - /* - * Continue to add memory to this fake node if its - * non-reserved memory is less than the per-node size. - */ - while (end - start - mem_hole_size(start, end) < size) { - end += FAKE_NODE_MIN_SIZE; - if (end > limit) { - end = limit; - break; - } - } - - /* - * If there won't be at least FAKE_NODE_MIN_SIZE of - * non-reserved memory in ZONE_DMA32 for the next node, - * this one must extend to the boundary. - */ - if (end < dma32_end && dma32_end - end - - mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) - end = dma32_end; - - /* - * If there won't be enough non-reserved memory for the - * next node, this one must extend to the end of the - * physical node. - */ - if (limit - end - mem_hole_size(end, limit) < size) - end = limit; - - ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, - phys_blk, - min(end, limit) - start); - if (ret < 0) - return ret; - } - } - return 0; -} - -/* - * Returns the end address of a node so that there is at least `size' amount of - * non-reserved memory or `max_addr' is reached. - */ -static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) -{ - u64 end = start + size; - - while (end - start - mem_hole_size(start, end) < size) { - end += FAKE_NODE_MIN_SIZE; - if (end > max_addr) { - end = max_addr; - break; - } - } - return end; -} - -/* - * Sets up fake nodes of `size' interleaved over physical nodes ranging from - * `addr' to `max_addr'. The return value is the number of nodes allocated. - */ -static int __init split_nodes_size_interleave(struct numa_meminfo *ei, - struct numa_meminfo *pi, - u64 addr, u64 max_addr, u64 size) -{ - nodemask_t physnode_mask = NODE_MASK_NONE; - u64 min_size; - int nid = 0; - int i, ret; - - if (!size) - return -1; - /* - * The limit on emulated nodes is MAX_NUMNODES, so the size per node is - * increased accordingly if the requested size is too small. This - * creates a uniform distribution of node sizes across the entire - * machine (but not necessarily over physical nodes). - */ - min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES; - min_size = max(min_size, FAKE_NODE_MIN_SIZE); - if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) - min_size = (min_size + FAKE_NODE_MIN_SIZE) & - FAKE_NODE_MIN_HASH_MASK; - if (size < min_size) { - pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", - size >> 20, min_size >> 20); - size = min_size; - } - size &= FAKE_NODE_MIN_HASH_MASK; - - for (i = 0; i < pi->nr_blks; i++) - node_set(pi->blk[i].nid, physnode_mask); - - /* - * Fill physical nodes with fake nodes of size until there is no memory - * left on any of them. - */ - while (nodes_weight(physnode_mask)) { - for_each_node_mask(i, physnode_mask) { - u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); - u64 start, limit, end; - int phys_blk; - - phys_blk = emu_find_memblk_by_nid(i, pi); - if (phys_blk < 0) { - node_clear(i, physnode_mask); - continue; - } - start = pi->blk[phys_blk].start; - limit = pi->blk[phys_blk].end; - - end = find_end_of_node(start, limit, size); - /* - * If there won't be at least FAKE_NODE_MIN_SIZE of - * non-reserved memory in ZONE_DMA32 for the next node, - * this one must extend to the boundary. - */ - if (end < dma32_end && dma32_end - end - - mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) - end = dma32_end; - - /* - * If there won't be enough non-reserved memory for the - * next node, this one must extend to the end of the - * physical node. - */ - if (limit - end - mem_hole_size(end, limit) < size) - end = limit; - - ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, - phys_blk, - min(end, limit) - start); - if (ret < 0) - return ret; - } - } - return 0; -} - -/** - * numa_emulation - Emulate NUMA nodes - * @numa_meminfo: NUMA configuration to massage - * @numa_dist_cnt: The size of the physical NUMA distance table - * - * Emulate NUMA nodes according to the numa=fake kernel parameter. - * @numa_meminfo contains the physical memory configuration and is modified - * to reflect the emulated configuration on success. @numa_dist_cnt is - * used to determine the size of the physical distance table. - * - * On success, the following modifications are made. - * - * - @numa_meminfo is updated to reflect the emulated nodes. - * - * - __apicid_to_node[] is updated such that APIC IDs are mapped to the - * emulated nodes. - * - * - NUMA distance table is rebuilt to represent distances between emulated - * nodes. The distances are determined considering how emulated nodes - * are mapped to physical nodes and match the actual distances. - * - * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical - * nodes. This is used by numa_add_cpu() and numa_remove_cpu(). - * - * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with - * identity mapping and no other modification is made. - */ -void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) -{ - static struct numa_meminfo ei __initdata; - static struct numa_meminfo pi __initdata; - const u64 max_addr = PFN_PHYS(max_pfn); - u8 *phys_dist = NULL; - size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); - int max_emu_nid, dfl_phys_nid; - int i, j, ret; - - if (!emu_cmdline) - goto no_emu; - - memset(&ei, 0, sizeof(ei)); - pi = *numa_meminfo; - - for (i = 0; i < MAX_NUMNODES; i++) - emu_nid_to_phys[i] = NUMA_NO_NODE; - - /* - * If the numa=fake command-line contains a 'M' or 'G', it represents - * the fixed node size. Otherwise, if it is just a single number N, - * split the system RAM into N fake nodes. - */ - if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) { - u64 size; - - size = memparse(emu_cmdline, &emu_cmdline); - ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size); - } else { - unsigned long n; - - n = simple_strtoul(emu_cmdline, NULL, 0); - ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n); - } - - if (ret < 0) - goto no_emu; - - if (numa_cleanup_meminfo(&ei) < 0) { - pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n"); - goto no_emu; - } - - /* copy the physical distance table */ - if (numa_dist_cnt) { - u64 phys; - - phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), - phys_size, PAGE_SIZE); - if (!phys) { - pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); - goto no_emu; - } - memblock_reserve(phys, phys_size); - phys_dist = __va(phys); - - for (i = 0; i < numa_dist_cnt; i++) - for (j = 0; j < numa_dist_cnt; j++) - phys_dist[i * numa_dist_cnt + j] = - node_distance(i, j); - } - - /* - * Determine the max emulated nid and the default phys nid to use - * for unmapped nodes. - */ - max_emu_nid = 0; - dfl_phys_nid = NUMA_NO_NODE; - for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) { - if (emu_nid_to_phys[i] != NUMA_NO_NODE) { - max_emu_nid = i; - if (dfl_phys_nid == NUMA_NO_NODE) - dfl_phys_nid = emu_nid_to_phys[i]; - } - } - if (dfl_phys_nid == NUMA_NO_NODE) { - pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n"); - goto no_emu; - } - - /* commit */ - *numa_meminfo = ei; - - /* - * Transform __apicid_to_node table to use emulated nids by - * reverse-mapping phys_nid. The maps should always exist but fall - * back to zero just in case. - */ - for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) { - if (__apicid_to_node[i] == NUMA_NO_NODE) - continue; - for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) - if (__apicid_to_node[i] == emu_nid_to_phys[j]) - break; - __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0; - } - - /* make sure all emulated nodes are mapped to a physical node */ - for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) - if (emu_nid_to_phys[i] == NUMA_NO_NODE) - emu_nid_to_phys[i] = dfl_phys_nid; - - /* transform distance table */ - numa_reset_distance(); - for (i = 0; i < max_emu_nid + 1; i++) { - for (j = 0; j < max_emu_nid + 1; j++) { - int physi = emu_nid_to_phys[i]; - int physj = emu_nid_to_phys[j]; - int dist; - - if (physi >= numa_dist_cnt || physj >= numa_dist_cnt) - dist = physi == physj ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - else - dist = phys_dist[physi * numa_dist_cnt + physj]; - - numa_set_distance(i, j, dist); - } - } - - /* free the copied physical distance table */ - if (phys_dist) - memblock_free(__pa(phys_dist), phys_size); - return; - -no_emu: - /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */ - for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) - emu_nid_to_phys[i] = i; -} - -#ifndef CONFIG_DEBUG_PER_CPU_MAPS -void __cpuinit numa_add_cpu(int cpu) -{ - int physnid, nid; - - nid = early_cpu_to_node(cpu); - BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); - - physnid = emu_nid_to_phys[nid]; - - /* - * Map the cpu to each emulated node that is allocated on the physical - * node of the cpu's apic id. - */ - for_each_online_node(nid) - if (emu_nid_to_phys[nid] == physnid) - cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - int i; - - for_each_online_node(i) - cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); -} -#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ -static void __cpuinit numa_set_cpumask(int cpu, bool enable) -{ - int nid, physnid; - - nid = early_cpu_to_node(cpu); - if (nid == NUMA_NO_NODE) { - /* early_cpu_to_node() already emits a warning and trace */ - return; - } - - physnid = emu_nid_to_phys[nid]; - - for_each_online_node(nid) { - if (emu_nid_to_phys[nid] != physnid) - continue; - - debug_cpumask_set_cpu(cpu, nid, enable); - } -} - -void __cpuinit numa_add_cpu(int cpu) -{ - numa_set_cpumask(cpu, true); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - numa_set_cpumask(cpu, false); -} -#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ diff --git a/ANDROID_3.4.5/arch/x86/mm/numa_internal.h b/ANDROID_3.4.5/arch/x86/mm/numa_internal.h deleted file mode 100644 index 7178c3af..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/numa_internal.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __X86_MM_NUMA_INTERNAL_H -#define __X86_MM_NUMA_INTERNAL_H - -#include <linux/types.h> -#include <asm/numa.h> - -struct numa_memblk { - u64 start; - u64 end; - int nid; -}; - -struct numa_meminfo { - int nr_blks; - struct numa_memblk blk[NR_NODE_MEMBLKS]; -}; - -void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); -int __init numa_cleanup_meminfo(struct numa_meminfo *mi); -void __init numa_reset_distance(void); - -void __init x86_numa_init(void); - -#ifdef CONFIG_X86_64 -static inline void init_alloc_remap(int nid, u64 start, u64 end) { } -#else -void __init init_alloc_remap(int nid, u64 start, u64 end); -#endif - -#ifdef CONFIG_NUMA_EMU -void __init numa_emulation(struct numa_meminfo *numa_meminfo, - int numa_dist_cnt); -#else -static inline void numa_emulation(struct numa_meminfo *numa_meminfo, - int numa_dist_cnt) -{ } -#endif - -#endif /* __X86_MM_NUMA_INTERNAL_H */ diff --git a/ANDROID_3.4.5/arch/x86/mm/pageattr-test.c b/ANDROID_3.4.5/arch/x86/mm/pageattr-test.c deleted file mode 100644 index b0086567..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pageattr-test.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * self test for change_page_attr. - * - * Clears the a test pte bit on random pages in the direct mapping, - * then reverts and compares page tables forwards and afterwards. - */ -#include <linux/bootmem.h> -#include <linux/kthread.h> -#include <linux/random.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/mm.h> - -#include <asm/cacheflush.h> -#include <asm/pgtable.h> -#include <asm/kdebug.h> - -/* - * Only print the results of the first pass: - */ -static __read_mostly int print = 1; - -enum { - NTEST = 400, -#ifdef CONFIG_X86_64 - LPS = (1 << PMD_SHIFT), -#elif defined(CONFIG_X86_PAE) - LPS = (1 << PMD_SHIFT), -#else - LPS = (1 << 22), -#endif - GPS = (1<<30) -}; - -#define PAGE_CPA_TEST __pgprot(_PAGE_CPA_TEST) - -static int pte_testbit(pte_t pte) -{ - return pte_flags(pte) & _PAGE_UNUSED1; -} - -struct split_state { - long lpg, gpg, spg, exec; - long min_exec, max_exec; -}; - -static int print_split(struct split_state *s) -{ - long i, expected, missed = 0; - int err = 0; - - s->lpg = s->gpg = s->spg = s->exec = 0; - s->min_exec = ~0UL; - s->max_exec = 0; - for (i = 0; i < max_pfn_mapped; ) { - unsigned long addr = (unsigned long)__va(i << PAGE_SHIFT); - unsigned int level; - pte_t *pte; - - pte = lookup_address(addr, &level); - if (!pte) { - missed++; - i++; - continue; - } - - if (level == PG_LEVEL_1G && sizeof(long) == 8) { - s->gpg++; - i += GPS/PAGE_SIZE; - } else if (level == PG_LEVEL_2M) { - if (!(pte_val(*pte) & _PAGE_PSE)) { - printk(KERN_ERR - "%lx level %d but not PSE %Lx\n", - addr, level, (u64)pte_val(*pte)); - err = 1; - } - s->lpg++; - i += LPS/PAGE_SIZE; - } else { - s->spg++; - i++; - } - if (!(pte_val(*pte) & _PAGE_NX)) { - s->exec++; - if (addr < s->min_exec) - s->min_exec = addr; - if (addr > s->max_exec) - s->max_exec = addr; - } - } - if (print) { - printk(KERN_INFO - " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", - s->spg, s->lpg, s->gpg, s->exec, - s->min_exec != ~0UL ? s->min_exec : 0, - s->max_exec, missed); - } - - expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed; - if (expected != i) { - printk(KERN_ERR "CPA max_pfn_mapped %lu but expected %lu\n", - max_pfn_mapped, expected); - return 1; - } - return err; -} - -static unsigned long addr[NTEST]; -static unsigned int len[NTEST]; - -/* Change the global bit on random pages in the direct mapping */ -static int pageattr_test(void) -{ - struct split_state sa, sb, sc; - unsigned long *bm; - pte_t *pte, pte0; - int failed = 0; - unsigned int level; - int i, k; - int err; - unsigned long test_addr; - - if (print) - printk(KERN_INFO "CPA self-test:\n"); - - bm = vzalloc((max_pfn_mapped + 7) / 8); - if (!bm) { - printk(KERN_ERR "CPA Cannot vmalloc bitmap\n"); - return -ENOMEM; - } - - failed += print_split(&sa); - srandom32(100); - - for (i = 0; i < NTEST; i++) { - unsigned long pfn = random32() % max_pfn_mapped; - - addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); - len[i] = random32() % 100; - len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); - - if (len[i] == 0) - len[i] = 1; - - pte = NULL; - pte0 = pfn_pte(0, __pgprot(0)); /* shut gcc up */ - - for (k = 0; k < len[i]; k++) { - pte = lookup_address(addr[i] + k*PAGE_SIZE, &level); - if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 || - !(pte_val(*pte) & _PAGE_PRESENT)) { - addr[i] = 0; - break; - } - if (k == 0) { - pte0 = *pte; - } else { - if (pgprot_val(pte_pgprot(*pte)) != - pgprot_val(pte_pgprot(pte0))) { - len[i] = k; - break; - } - } - if (test_bit(pfn + k, bm)) { - len[i] = k; - break; - } - __set_bit(pfn + k, bm); - } - if (!addr[i] || !pte || !k) { - addr[i] = 0; - continue; - } - - test_addr = addr[i]; - err = change_page_attr_set(&test_addr, len[i], PAGE_CPA_TEST, 0); - if (err < 0) { - printk(KERN_ERR "CPA %d failed %d\n", i, err); - failed++; - } - - pte = lookup_address(addr[i], &level); - if (!pte || !pte_testbit(*pte) || pte_huge(*pte)) { - printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i], - pte ? (u64)pte_val(*pte) : 0ULL); - failed++; - } - if (level != PG_LEVEL_4K) { - printk(KERN_ERR "CPA %lx: unexpected level %d\n", - addr[i], level); - failed++; - } - - } - vfree(bm); - - failed += print_split(&sb); - - for (i = 0; i < NTEST; i++) { - if (!addr[i]) - continue; - pte = lookup_address(addr[i], &level); - if (!pte) { - printk(KERN_ERR "CPA lookup of %lx failed\n", addr[i]); - failed++; - continue; - } - test_addr = addr[i]; - err = change_page_attr_clear(&test_addr, len[i], PAGE_CPA_TEST, 0); - if (err < 0) { - printk(KERN_ERR "CPA reverting failed: %d\n", err); - failed++; - } - pte = lookup_address(addr[i], &level); - if (!pte || pte_testbit(*pte)) { - printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n", - addr[i], pte ? (u64)pte_val(*pte) : 0ULL); - failed++; - } - - } - - failed += print_split(&sc); - - if (failed) { - WARN(1, KERN_ERR "NOT PASSED. Please report.\n"); - return -EINVAL; - } else { - if (print) - printk(KERN_INFO "ok.\n"); - } - - return 0; -} - -static int do_pageattr_test(void *__unused) -{ - while (!kthread_should_stop()) { - schedule_timeout_interruptible(HZ*30); - if (pageattr_test() < 0) - break; - if (print) - print--; - } - return 0; -} - -static int start_pageattr_test(void) -{ - struct task_struct *p; - - p = kthread_create(do_pageattr_test, NULL, "pageattr-test"); - if (!IS_ERR(p)) - wake_up_process(p); - else - WARN_ON(1); - - return 0; -} - -module_init(start_pageattr_test); diff --git a/ANDROID_3.4.5/arch/x86/mm/pageattr.c b/ANDROID_3.4.5/arch/x86/mm/pageattr.c deleted file mode 100644 index e1ebde31..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pageattr.c +++ /dev/null @@ -1,1377 +0,0 @@ -/* - * Copyright 2002 Andi Kleen, SuSE Labs. - * Thanks to Ben LaHaise for precious feedback. - */ -#include <linux/highmem.h> -#include <linux/bootmem.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/pfn.h> -#include <linux/percpu.h> -#include <linux/gfp.h> -#include <linux/pci.h> - -#include <asm/e820.h> -#include <asm/processor.h> -#include <asm/tlbflush.h> -#include <asm/sections.h> -#include <asm/setup.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/proto.h> -#include <asm/pat.h> - -/* - * The current flushing context - we pass it instead of 5 arguments: - */ -struct cpa_data { - unsigned long *vaddr; - pgprot_t mask_set; - pgprot_t mask_clr; - int numpages; - int flags; - unsigned long pfn; - unsigned force_split : 1; - int curpage; - struct page **pages; -}; - -/* - * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) - * using cpa_lock. So that we don't allow any other cpu, with stale large tlb - * entries change the page attribute in parallel to some other cpu - * splitting a large page entry along with changing the attribute. - */ -static DEFINE_SPINLOCK(cpa_lock); - -#define CPA_FLUSHTLB 1 -#define CPA_ARRAY 2 -#define CPA_PAGES_ARRAY 4 - -#ifdef CONFIG_PROC_FS -static unsigned long direct_pages_count[PG_LEVEL_NUM]; - -void update_page_count(int level, unsigned long pages) -{ - /* Protect against CPA */ - spin_lock(&pgd_lock); - direct_pages_count[level] += pages; - spin_unlock(&pgd_lock); -} - -static void split_page_count(int level) -{ - direct_pages_count[level]--; - direct_pages_count[level - 1] += PTRS_PER_PTE; -} - -void arch_report_meminfo(struct seq_file *m) -{ - seq_printf(m, "DirectMap4k: %8lu kB\n", - direct_pages_count[PG_LEVEL_4K] << 2); -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - seq_printf(m, "DirectMap2M: %8lu kB\n", - direct_pages_count[PG_LEVEL_2M] << 11); -#else - seq_printf(m, "DirectMap4M: %8lu kB\n", - direct_pages_count[PG_LEVEL_2M] << 12); -#endif -#ifdef CONFIG_X86_64 - if (direct_gbpages) - seq_printf(m, "DirectMap1G: %8lu kB\n", - direct_pages_count[PG_LEVEL_1G] << 20); -#endif -} -#else -static inline void split_page_count(int level) { } -#endif - -#ifdef CONFIG_X86_64 - -static inline unsigned long highmap_start_pfn(void) -{ - return __pa(_text) >> PAGE_SHIFT; -} - -static inline unsigned long highmap_end_pfn(void) -{ - return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; -} - -#endif - -#ifdef CONFIG_DEBUG_PAGEALLOC -# define debug_pagealloc 1 -#else -# define debug_pagealloc 0 -#endif - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr < end; -} - -/* - * Flushing functions - */ - -/** - * clflush_cache_range - flush a cache range with clflush - * @addr: virtual start address - * @size: number of bytes to flush - * - * clflush is an unordered instruction which needs fencing with mfence - * to avoid ordering issues. - */ -void clflush_cache_range(void *vaddr, unsigned int size) -{ - void *vend = vaddr + size - 1; - - mb(); - - for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) - clflush(vaddr); - /* - * Flush any possible final partial cacheline: - */ - clflush(vend); - - mb(); -} -EXPORT_SYMBOL_GPL(clflush_cache_range); - -static void __cpa_flush_all(void *arg) -{ - unsigned long cache = (unsigned long)arg; - - /* - * Flush all to work around Errata in early athlons regarding - * large page flushing. - */ - __flush_tlb_all(); - - if (cache && boot_cpu_data.x86 >= 4) - wbinvd(); -} - -static void cpa_flush_all(unsigned long cache) -{ - BUG_ON(irqs_disabled()); - - on_each_cpu(__cpa_flush_all, (void *) cache, 1); -} - -static void __cpa_flush_range(void *arg) -{ - /* - * We could optimize that further and do individual per page - * tlb invalidates for a low number of pages. Caveat: we must - * flush the high aliases on 64bit as well. - */ - __flush_tlb_all(); -} - -static void cpa_flush_range(unsigned long start, int numpages, int cache) -{ - unsigned int i, level; - unsigned long addr; - - BUG_ON(irqs_disabled()); - WARN_ON(PAGE_ALIGN(start) != start); - - on_each_cpu(__cpa_flush_range, NULL, 1); - - if (!cache) - return; - - /* - * We only need to flush on one CPU, - * clflush is a MESI-coherent instruction that - * will cause all other CPUs to flush the same - * cachelines: - */ - for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) { - pte_t *pte = lookup_address(addr, &level); - - /* - * Only flush present addresses: - */ - if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range((void *) addr, PAGE_SIZE); - } -} - -static void cpa_flush_array(unsigned long *start, int numpages, int cache, - int in_flags, struct page **pages) -{ - unsigned int i, level; - unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ - - BUG_ON(irqs_disabled()); - - on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); - - if (!cache || do_wbinvd) - return; - - /* - * We only need to flush on one CPU, - * clflush is a MESI-coherent instruction that - * will cause all other CPUs to flush the same - * cachelines: - */ - for (i = 0; i < numpages; i++) { - unsigned long addr; - pte_t *pte; - - if (in_flags & CPA_PAGES_ARRAY) - addr = (unsigned long)page_address(pages[i]); - else - addr = start[i]; - - pte = lookup_address(addr, &level); - - /* - * Only flush present addresses: - */ - if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range((void *)addr, PAGE_SIZE); - } -} - -/* - * Certain areas of memory on x86 require very specific protection flags, - * for example the BIOS area or kernel text. Callers don't always get this - * right (again, ioremap() on BIOS memory is not uncommon) so this function - * checks and fixes these known static required protection bits. - */ -static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, - unsigned long pfn) -{ - pgprot_t forbidden = __pgprot(0); - - /* - * The BIOS area between 640k and 1Mb needs to be executable for - * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. - */ -#ifdef CONFIG_PCI_BIOS - if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_NX; -#endif - - /* - * The kernel text needs to be executable for obvious reasons - * Does not cover __inittext since that is gone later on. On - * 64bit we do not enforce !NX on the low mapping - */ - if (within(address, (unsigned long)_text, (unsigned long)_etext)) - pgprot_val(forbidden) |= _PAGE_NX; - - /* - * The .rodata section needs to be read-only. Using the pfn - * catches all aliases. - */ - if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, - __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_RW; - -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) - /* - * Once the kernel maps the text as RO (kernel_set_to_readonly is set), - * kernel text mappings for the large page aligned text, rodata sections - * will be always read-only. For the kernel identity mappings covering - * the holes caused by this alignment can be anything that user asks. - * - * This will preserve the large page mappings for kernel text/data - * at no extra cost. - */ - if (kernel_set_to_readonly && - within(address, (unsigned long)_text, - (unsigned long)__end_rodata_hpage_align)) { - unsigned int level; - - /* - * Don't enforce the !RW mapping for the kernel text mapping, - * if the current mapping is already using small page mapping. - * No need to work hard to preserve large page mappings in this - * case. - * - * This also fixes the Linux Xen paravirt guest boot failure - * (because of unexpected read-only mappings for kernel identity - * mappings). In this paravirt guest case, the kernel text - * mapping and the kernel identity mapping share the same - * page-table pages. Thus we can't really use different - * protections for the kernel text and identity mappings. Also, - * these shared mappings are made of small page mappings. - * Thus this don't enforce !RW mapping for small page kernel - * text mapping logic will help Linux Xen parvirt guest boot - * as well. - */ - if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) - pgprot_val(forbidden) |= _PAGE_RW; - } -#endif - - prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); - - return prot; -} - -/* - * Lookup the page table entry for a virtual address. Return a pointer - * to the entry and the level of the mapping. - * - * Note: We return pud and pmd either when the entry is marked large - * or when the present bit is not set. Otherwise we would return a - * pointer to a nonexisting mapping. - */ -pte_t *lookup_address(unsigned long address, unsigned int *level) -{ - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud; - pmd_t *pmd; - - *level = PG_LEVEL_NONE; - - if (pgd_none(*pgd)) - return NULL; - - pud = pud_offset(pgd, address); - if (pud_none(*pud)) - return NULL; - - *level = PG_LEVEL_1G; - if (pud_large(*pud) || !pud_present(*pud)) - return (pte_t *)pud; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) - return NULL; - - *level = PG_LEVEL_2M; - if (pmd_large(*pmd) || !pmd_present(*pmd)) - return (pte_t *)pmd; - - *level = PG_LEVEL_4K; - - return pte_offset_kernel(pmd, address); -} -EXPORT_SYMBOL_GPL(lookup_address); - -/* - * Set the new pmd in all the pgds we know about: - */ -static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) -{ - /* change init_mm */ - set_pte_atomic(kpte, pte); -#ifdef CONFIG_X86_32 - if (!SHARED_KERNEL_PMD) { - struct page *page; - - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - pgd = (pgd_t *)page_address(page) + pgd_index(address); - pud = pud_offset(pgd, address); - pmd = pmd_offset(pud, address); - set_pte_atomic((pte_t *)pmd, pte); - } - } -#endif -} - -static int -try_preserve_large_page(pte_t *kpte, unsigned long address, - struct cpa_data *cpa) -{ - unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; - pte_t new_pte, old_pte, *tmp; - pgprot_t old_prot, new_prot, req_prot; - int i, do_split = 1; - unsigned int level; - - if (cpa->force_split) - return 1; - - spin_lock(&pgd_lock); - /* - * Check for races, another CPU might have split this page - * up already: - */ - tmp = lookup_address(address, &level); - if (tmp != kpte) - goto out_unlock; - - switch (level) { - case PG_LEVEL_2M: - psize = PMD_PAGE_SIZE; - pmask = PMD_PAGE_MASK; - break; -#ifdef CONFIG_X86_64 - case PG_LEVEL_1G: - psize = PUD_PAGE_SIZE; - pmask = PUD_PAGE_MASK; - break; -#endif - default: - do_split = -EINVAL; - goto out_unlock; - } - - /* - * Calculate the number of pages, which fit into this large - * page starting at address: - */ - nextpage_addr = (address + psize) & pmask; - numpages = (nextpage_addr - address) >> PAGE_SHIFT; - if (numpages < cpa->numpages) - cpa->numpages = numpages; - - /* - * We are safe now. Check whether the new pgprot is the same: - */ - old_pte = *kpte; - old_prot = new_prot = req_prot = pte_pgprot(old_pte); - - pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); - - /* - * old_pte points to the large page base address. So we need - * to add the offset of the virtual address: - */ - pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); - cpa->pfn = pfn; - - new_prot = static_protections(req_prot, address, pfn); - - /* - * We need to check the full range, whether - * static_protection() requires a different pgprot for one of - * the pages in the range we try to preserve: - */ - addr = address & pmask; - pfn = pte_pfn(old_pte); - for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(req_prot, addr, pfn); - - if (pgprot_val(chk_prot) != pgprot_val(new_prot)) - goto out_unlock; - } - - /* - * If there are no changes, return. maxpages has been updated - * above: - */ - if (pgprot_val(new_prot) == pgprot_val(old_prot)) { - do_split = 0; - goto out_unlock; - } - - /* - * We need to change the attributes. Check, whether we can - * change the large page in one go. We request a split, when - * the address is not aligned and the number of pages is - * smaller than the number of pages in the large page. Note - * that we limited the number of possible pages already to - * the number of pages in the large page. - */ - if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { - /* - * The address is aligned and the number of pages - * covers the full page. - */ - new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); - __set_pmd_pte(kpte, address, new_pte); - cpa->flags |= CPA_FLUSHTLB; - do_split = 0; - } - -out_unlock: - spin_unlock(&pgd_lock); - - return do_split; -} - -static int split_large_page(pte_t *kpte, unsigned long address) -{ - unsigned long pfn, pfninc = 1; - unsigned int i, level; - pte_t *pbase, *tmp; - pgprot_t ref_prot; - struct page *base; - - if (!debug_pagealloc) - spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); - if (!debug_pagealloc) - spin_lock(&cpa_lock); - if (!base) - return -ENOMEM; - - spin_lock(&pgd_lock); - /* - * Check for races, another CPU might have split this page - * up for us already: - */ - tmp = lookup_address(address, &level); - if (tmp != kpte) - goto out_unlock; - - pbase = (pte_t *)page_address(base); - paravirt_alloc_pte(&init_mm, page_to_pfn(base)); - ref_prot = pte_pgprot(pte_clrhuge(*kpte)); - /* - * If we ever want to utilize the PAT bit, we need to - * update this function to make sure it's converted from - * bit 12 to bit 7 when we cross from the 2MB level to - * the 4K level: - */ - WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE); - -#ifdef CONFIG_X86_64 - if (level == PG_LEVEL_1G) { - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; - pgprot_val(ref_prot) |= _PAGE_PSE; - } -#endif - - /* - * Get the target pfn from the original entry: - */ - pfn = pte_pfn(*kpte); - for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) - set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); - - if (address >= (unsigned long)__va(0) && - address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) - split_page_count(level); - -#ifdef CONFIG_X86_64 - if (address >= (unsigned long)__va(1UL<<32) && - address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) - split_page_count(level); -#endif - - /* - * Install the new, split up pagetable. - * - * We use the standard kernel pagetable protections for the new - * pagetable protections, the actual ptes set above control the - * primary protection behavior: - */ - __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); - - /* - * Intel Atom errata AAH41 workaround. - * - * The real fix should be in hw or in a microcode update, but - * we also probabilistically try to reduce the window of having - * a large TLB mixed with 4K TLBs while instruction fetches are - * going on. - */ - __flush_tlb_all(); - - base = NULL; - -out_unlock: - /* - * If we dropped out via the lookup_address check under - * pgd_lock then stick the page back into the pool: - */ - if (base) - __free_page(base); - spin_unlock(&pgd_lock); - - return 0; -} - -static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, - int primary) -{ - /* - * Ignore all non primary paths. - */ - if (!primary) - return 0; - - /* - * Ignore the NULL PTE for kernel identity mapping, as it is expected - * to have holes. - * Also set numpages to '1' indicating that we processed cpa req for - * one virtual address page and its pfn. TBD: numpages can be set based - * on the initial value and the level returned by lookup_address(). - */ - if (within(vaddr, PAGE_OFFSET, - PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { - cpa->numpages = 1; - cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; - return 0; - } else { - WARN(1, KERN_WARNING "CPA: called for zero pte. " - "vaddr = %lx cpa->vaddr = %lx\n", vaddr, - *cpa->vaddr); - - return -EFAULT; - } -} - -static int __change_page_attr(struct cpa_data *cpa, int primary) -{ - unsigned long address; - int do_split, err; - unsigned int level; - pte_t *kpte, old_pte; - - if (cpa->flags & CPA_PAGES_ARRAY) { - struct page *page = cpa->pages[cpa->curpage]; - if (unlikely(PageHighMem(page))) - return 0; - address = (unsigned long)page_address(page); - } else if (cpa->flags & CPA_ARRAY) - address = cpa->vaddr[cpa->curpage]; - else - address = *cpa->vaddr; -repeat: - kpte = lookup_address(address, &level); - if (!kpte) - return __cpa_process_fault(cpa, address, primary); - - old_pte = *kpte; - if (!pte_val(old_pte)) - return __cpa_process_fault(cpa, address, primary); - - if (level == PG_LEVEL_4K) { - pte_t new_pte; - pgprot_t new_prot = pte_pgprot(old_pte); - unsigned long pfn = pte_pfn(old_pte); - - pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); - - new_prot = static_protections(new_prot, address, pfn); - - /* - * We need to keep the pfn from the existing PTE, - * after all we're only going to change it's attributes - * not the memory it points to - */ - new_pte = pfn_pte(pfn, canon_pgprot(new_prot)); - cpa->pfn = pfn; - /* - * Do we really change anything ? - */ - if (pte_val(old_pte) != pte_val(new_pte)) { - set_pte_atomic(kpte, new_pte); - cpa->flags |= CPA_FLUSHTLB; - } - cpa->numpages = 1; - return 0; - } - - /* - * Check, whether we can keep the large page intact - * and just change the pte: - */ - do_split = try_preserve_large_page(kpte, address, cpa); - /* - * When the range fits into the existing large page, - * return. cp->numpages and cpa->tlbflush have been updated in - * try_large_page: - */ - if (do_split <= 0) - return do_split; - - /* - * We have to split the large page: - */ - err = split_large_page(kpte, address); - if (!err) { - /* - * Do a global flush tlb after splitting the large page - * and before we do the actual change page attribute in the PTE. - * - * With out this, we violate the TLB application note, that says - * "The TLBs may contain both ordinary and large-page - * translations for a 4-KByte range of linear addresses. This - * may occur if software modifies the paging structures so that - * the page size used for the address range changes. If the two - * translations differ with respect to page frame or attributes - * (e.g., permissions), processor behavior is undefined and may - * be implementation-specific." - * - * We do this global tlb flush inside the cpa_lock, so that we - * don't allow any other cpu, with stale tlb entries change the - * page attribute in parallel, that also falls into the - * just split large page entry. - */ - flush_tlb_all(); - goto repeat; - } - - return err; -} - -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); - -static int cpa_process_alias(struct cpa_data *cpa) -{ - struct cpa_data alias_cpa; - unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr; - int ret; - - if (cpa->pfn >= max_pfn_mapped) - return 0; - -#ifdef CONFIG_X86_64 - if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) - return 0; -#endif - /* - * No need to redo, when the primary call touched the direct - * mapping already: - */ - if (cpa->flags & CPA_PAGES_ARRAY) { - struct page *page = cpa->pages[cpa->curpage]; - if (unlikely(PageHighMem(page))) - return 0; - vaddr = (unsigned long)page_address(page); - } else if (cpa->flags & CPA_ARRAY) - vaddr = cpa->vaddr[cpa->curpage]; - else - vaddr = *cpa->vaddr; - - if (!(within(vaddr, PAGE_OFFSET, - PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { - - alias_cpa = *cpa; - alias_cpa.vaddr = &laddr; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - - ret = __change_page_attr_set_clr(&alias_cpa, 0); - if (ret) - return ret; - } - -#ifdef CONFIG_X86_64 - /* - * If the primary call didn't touch the high mapping already - * and the physical address is inside the kernel map, we need - * to touch the high mapped kernel as well: - */ - if (!within(vaddr, (unsigned long)_text, _brk_end) && - within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { - unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + - __START_KERNEL_map - phys_base; - alias_cpa = *cpa; - alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - - /* - * The high mapping range is imprecise, so ignore the - * return value. - */ - __change_page_attr_set_clr(&alias_cpa, 0); - } -#endif - - return 0; -} - -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) -{ - int ret, numpages = cpa->numpages; - - while (numpages) { - /* - * Store the remaining nr of pages for the large page - * preservation check. - */ - cpa->numpages = numpages; - /* for array changes, we can't use large page */ - if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) - cpa->numpages = 1; - - if (!debug_pagealloc) - spin_lock(&cpa_lock); - ret = __change_page_attr(cpa, checkalias); - if (!debug_pagealloc) - spin_unlock(&cpa_lock); - if (ret) - return ret; - - if (checkalias) { - ret = cpa_process_alias(cpa); - if (ret) - return ret; - } - - /* - * Adjust the number of pages with the result of the - * CPA operation. Either a large page has been - * preserved or a single page update happened. - */ - BUG_ON(cpa->numpages > numpages); - numpages -= cpa->numpages; - if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) - cpa->curpage++; - else - *cpa->vaddr += cpa->numpages * PAGE_SIZE; - - } - return 0; -} - -static inline int cache_attr(pgprot_t attr) -{ - return pgprot_val(attr) & - (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); -} - -static int change_page_attr_set_clr(unsigned long *addr, int numpages, - pgprot_t mask_set, pgprot_t mask_clr, - int force_split, int in_flag, - struct page **pages) -{ - struct cpa_data cpa; - int ret, cache, checkalias; - unsigned long baddr = 0; - - /* - * Check, if we are requested to change a not supported - * feature: - */ - mask_set = canon_pgprot(mask_set); - mask_clr = canon_pgprot(mask_clr); - if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) - return 0; - - /* Ensure we are PAGE_SIZE aligned */ - if (in_flag & CPA_ARRAY) { - int i; - for (i = 0; i < numpages; i++) { - if (addr[i] & ~PAGE_MASK) { - addr[i] &= PAGE_MASK; - WARN_ON_ONCE(1); - } - } - } else if (!(in_flag & CPA_PAGES_ARRAY)) { - /* - * in_flag of CPA_PAGES_ARRAY implies it is aligned. - * No need to cehck in that case - */ - if (*addr & ~PAGE_MASK) { - *addr &= PAGE_MASK; - /* - * People should not be passing in unaligned addresses: - */ - WARN_ON_ONCE(1); - } - /* - * Save address for cache flush. *addr is modified in the call - * to __change_page_attr_set_clr() below. - */ - baddr = *addr; - } - - /* Must avoid aliasing mappings in the highmem code */ - kmap_flush_unused(); - - vm_unmap_aliases(); - - cpa.vaddr = addr; - cpa.pages = pages; - cpa.numpages = numpages; - cpa.mask_set = mask_set; - cpa.mask_clr = mask_clr; - cpa.flags = 0; - cpa.curpage = 0; - cpa.force_split = force_split; - - if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) - cpa.flags |= in_flag; - - /* No alias checking for _NX bit modifications */ - checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; - - ret = __change_page_attr_set_clr(&cpa, checkalias); - - /* - * Check whether we really changed something: - */ - if (!(cpa.flags & CPA_FLUSHTLB)) - goto out; - - /* - * No need to flush, when we did not set any of the caching - * attributes: - */ - cache = cache_attr(mask_set); - - /* - * On success we use clflush, when the CPU supports it to - * avoid the wbindv. If the CPU does not support it and in the - * error case we fall back to cpa_flush_all (which uses - * wbindv): - */ - if (!ret && cpu_has_clflush) { - if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { - cpa_flush_array(addr, numpages, cache, - cpa.flags, pages); - } else - cpa_flush_range(baddr, numpages, cache); - } else - cpa_flush_all(cache); - -out: - return ret; -} - -static inline int change_page_attr_set(unsigned long *addr, int numpages, - pgprot_t mask, int array) -{ - return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, - (array ? CPA_ARRAY : 0), NULL); -} - -static inline int change_page_attr_clear(unsigned long *addr, int numpages, - pgprot_t mask, int array) -{ - return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, - (array ? CPA_ARRAY : 0), NULL); -} - -static inline int cpa_set_pages_array(struct page **pages, int numpages, - pgprot_t mask) -{ - return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, - CPA_PAGES_ARRAY, pages); -} - -static inline int cpa_clear_pages_array(struct page **pages, int numpages, - pgprot_t mask) -{ - return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, - CPA_PAGES_ARRAY, pages); -} - -int _set_memory_uc(unsigned long addr, int numpages) -{ - /* - * for now UC MINUS. see comments in ioremap_nocache() - */ - return change_page_attr_set(&addr, numpages, - __pgprot(_PAGE_CACHE_UC_MINUS), 0); -} - -int set_memory_uc(unsigned long addr, int numpages) -{ - int ret; - - /* - * for now UC MINUS. see comments in ioremap_nocache() - */ - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, - _PAGE_CACHE_UC_MINUS, NULL); - if (ret) - goto out_err; - - ret = _set_memory_uc(addr, numpages); - if (ret) - goto out_free; - - return 0; - -out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); -out_err: - return ret; -} -EXPORT_SYMBOL(set_memory_uc); - -static int _set_memory_array(unsigned long *addr, int addrinarray, - unsigned long new_type) -{ - int i, j; - int ret; - - /* - * for now UC MINUS. see comments in ioremap_nocache() - */ - for (i = 0; i < addrinarray; i++) { - ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, - new_type, NULL); - if (ret) - goto out_free; - } - - ret = change_page_attr_set(addr, addrinarray, - __pgprot(_PAGE_CACHE_UC_MINUS), 1); - - if (!ret && new_type == _PAGE_CACHE_WC) - ret = change_page_attr_set_clr(addr, addrinarray, - __pgprot(_PAGE_CACHE_WC), - __pgprot(_PAGE_CACHE_MASK), - 0, CPA_ARRAY, NULL); - if (ret) - goto out_free; - - return 0; - -out_free: - for (j = 0; j < i; j++) - free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE); - - return ret; -} - -int set_memory_array_uc(unsigned long *addr, int addrinarray) -{ - return _set_memory_array(addr, addrinarray, _PAGE_CACHE_UC_MINUS); -} -EXPORT_SYMBOL(set_memory_array_uc); - -int set_memory_array_wc(unsigned long *addr, int addrinarray) -{ - return _set_memory_array(addr, addrinarray, _PAGE_CACHE_WC); -} -EXPORT_SYMBOL(set_memory_array_wc); - -int _set_memory_wc(unsigned long addr, int numpages) -{ - int ret; - unsigned long addr_copy = addr; - - ret = change_page_attr_set(&addr, numpages, - __pgprot(_PAGE_CACHE_UC_MINUS), 0); - if (!ret) { - ret = change_page_attr_set_clr(&addr_copy, numpages, - __pgprot(_PAGE_CACHE_WC), - __pgprot(_PAGE_CACHE_MASK), - 0, 0, NULL); - } - return ret; -} - -int set_memory_wc(unsigned long addr, int numpages) -{ - int ret; - - if (!pat_enabled) - return set_memory_uc(addr, numpages); - - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, - _PAGE_CACHE_WC, NULL); - if (ret) - goto out_err; - - ret = _set_memory_wc(addr, numpages); - if (ret) - goto out_free; - - return 0; - -out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); -out_err: - return ret; -} -EXPORT_SYMBOL(set_memory_wc); - -int _set_memory_wb(unsigned long addr, int numpages) -{ - return change_page_attr_clear(&addr, numpages, - __pgprot(_PAGE_CACHE_MASK), 0); -} - -int set_memory_wb(unsigned long addr, int numpages) -{ - int ret; - - ret = _set_memory_wb(addr, numpages); - if (ret) - return ret; - - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - return 0; -} -EXPORT_SYMBOL(set_memory_wb); - -int set_memory_array_wb(unsigned long *addr, int addrinarray) -{ - int i; - int ret; - - ret = change_page_attr_clear(addr, addrinarray, - __pgprot(_PAGE_CACHE_MASK), 1); - if (ret) - return ret; - - for (i = 0; i < addrinarray; i++) - free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE); - - return 0; -} -EXPORT_SYMBOL(set_memory_array_wb); - -int set_memory_x(unsigned long addr, int numpages) -{ - if (!(__supported_pte_mask & _PAGE_NX)) - return 0; - - return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); -} -EXPORT_SYMBOL(set_memory_x); - -int set_memory_nx(unsigned long addr, int numpages) -{ - if (!(__supported_pte_mask & _PAGE_NX)) - return 0; - - return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); -} -EXPORT_SYMBOL(set_memory_nx); - -int set_memory_ro(unsigned long addr, int numpages) -{ - return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); -} -EXPORT_SYMBOL_GPL(set_memory_ro); - -int set_memory_rw(unsigned long addr, int numpages) -{ - return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); -} -EXPORT_SYMBOL_GPL(set_memory_rw); - -int set_memory_np(unsigned long addr, int numpages) -{ - return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); -} - -int set_memory_4k(unsigned long addr, int numpages) -{ - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), - __pgprot(0), 1, 0, NULL); -} - -int set_pages_uc(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_uc(addr, numpages); -} -EXPORT_SYMBOL(set_pages_uc); - -static int _set_pages_array(struct page **pages, int addrinarray, - unsigned long new_type) -{ - unsigned long start; - unsigned long end; - int i; - int free_idx; - int ret; - - for (i = 0; i < addrinarray; i++) { - if (PageHighMem(pages[i])) - continue; - start = page_to_pfn(pages[i]) << PAGE_SHIFT; - end = start + PAGE_SIZE; - if (reserve_memtype(start, end, new_type, NULL)) - goto err_out; - } - - ret = cpa_set_pages_array(pages, addrinarray, - __pgprot(_PAGE_CACHE_UC_MINUS)); - if (!ret && new_type == _PAGE_CACHE_WC) - ret = change_page_attr_set_clr(NULL, addrinarray, - __pgprot(_PAGE_CACHE_WC), - __pgprot(_PAGE_CACHE_MASK), - 0, CPA_PAGES_ARRAY, pages); - if (ret) - goto err_out; - return 0; /* Success */ -err_out: - free_idx = i; - for (i = 0; i < free_idx; i++) { - if (PageHighMem(pages[i])) - continue; - start = page_to_pfn(pages[i]) << PAGE_SHIFT; - end = start + PAGE_SIZE; - free_memtype(start, end); - } - return -EINVAL; -} - -int set_pages_array_uc(struct page **pages, int addrinarray) -{ - return _set_pages_array(pages, addrinarray, _PAGE_CACHE_UC_MINUS); -} -EXPORT_SYMBOL(set_pages_array_uc); - -int set_pages_array_wc(struct page **pages, int addrinarray) -{ - return _set_pages_array(pages, addrinarray, _PAGE_CACHE_WC); -} -EXPORT_SYMBOL(set_pages_array_wc); - -int set_pages_wb(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_wb(addr, numpages); -} -EXPORT_SYMBOL(set_pages_wb); - -int set_pages_array_wb(struct page **pages, int addrinarray) -{ - int retval; - unsigned long start; - unsigned long end; - int i; - - retval = cpa_clear_pages_array(pages, addrinarray, - __pgprot(_PAGE_CACHE_MASK)); - if (retval) - return retval; - - for (i = 0; i < addrinarray; i++) { - if (PageHighMem(pages[i])) - continue; - start = page_to_pfn(pages[i]) << PAGE_SHIFT; - end = start + PAGE_SIZE; - free_memtype(start, end); - } - - return 0; -} -EXPORT_SYMBOL(set_pages_array_wb); - -int set_pages_x(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_x(addr, numpages); -} -EXPORT_SYMBOL(set_pages_x); - -int set_pages_nx(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_nx(addr, numpages); -} -EXPORT_SYMBOL(set_pages_nx); - -int set_pages_ro(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_ro(addr, numpages); -} - -int set_pages_rw(struct page *page, int numpages) -{ - unsigned long addr = (unsigned long)page_address(page); - - return set_memory_rw(addr, numpages); -} - -#ifdef CONFIG_DEBUG_PAGEALLOC - -static int __set_pages_p(struct page *page, int numpages) -{ - unsigned long tempaddr = (unsigned long) page_address(page); - struct cpa_data cpa = { .vaddr = &tempaddr, - .numpages = numpages, - .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .mask_clr = __pgprot(0), - .flags = 0}; - - /* - * No alias checking needed for setting present flag. otherwise, - * we may need to break large pages for 64-bit kernel text - * mappings (this adds to complexity if we want to do this from - * atomic context especially). Let's keep it simple! - */ - return __change_page_attr_set_clr(&cpa, 0); -} - -static int __set_pages_np(struct page *page, int numpages) -{ - unsigned long tempaddr = (unsigned long) page_address(page); - struct cpa_data cpa = { .vaddr = &tempaddr, - .numpages = numpages, - .mask_set = __pgprot(0), - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .flags = 0}; - - /* - * No alias checking needed for setting not present flag. otherwise, - * we may need to break large pages for 64-bit kernel text - * mappings (this adds to complexity if we want to do this from - * atomic context especially). Let's keep it simple! - */ - return __change_page_attr_set_clr(&cpa, 0); -} - -void kernel_map_pages(struct page *page, int numpages, int enable) -{ - if (PageHighMem(page)) - return; - if (!enable) { - debug_check_no_locks_freed(page_address(page), - numpages * PAGE_SIZE); - } - - /* - * The return value is ignored as the calls cannot fail. - * Large pages for identity mappings are not used at boot time - * and hence no memory allocations during large page split. - */ - if (enable) - __set_pages_p(page, numpages); - else - __set_pages_np(page, numpages); - - /* - * We should perform an IPI and flush all tlbs, - * but that can deadlock->flush only current cpu: - */ - __flush_tlb_all(); -} - -#ifdef CONFIG_HIBERNATION - -bool kernel_page_present(struct page *page) -{ - unsigned int level; - pte_t *pte; - - if (PageHighMem(page)) - return false; - - pte = lookup_address((unsigned long)page_address(page), &level); - return (pte_val(*pte) & _PAGE_PRESENT); -} - -#endif /* CONFIG_HIBERNATION */ - -#endif /* CONFIG_DEBUG_PAGEALLOC */ - -/* - * The testcases use internal knowledge of the implementation that shouldn't - * be exposed to the rest of the kernel. Include these directly here. - */ -#ifdef CONFIG_CPA_DEBUG -#include "pageattr-test.c" -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/pat.c b/ANDROID_3.4.5/arch/x86/mm/pat.c deleted file mode 100644 index f6ff57b7..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pat.c +++ /dev/null @@ -1,828 +0,0 @@ -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * Suresh B Siddha <suresh.b.siddha@intel.com> - * - * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. - */ - -#include <linux/seq_file.h> -#include <linux/bootmem.h> -#include <linux/debugfs.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/rbtree.h> - -#include <asm/cacheflush.h> -#include <asm/processor.h> -#include <asm/tlbflush.h> -#include <asm/x86_init.h> -#include <asm/pgtable.h> -#include <asm/fcntl.h> -#include <asm/e820.h> -#include <asm/mtrr.h> -#include <asm/page.h> -#include <asm/msr.h> -#include <asm/pat.h> -#include <asm/io.h> - -#include "pat_internal.h" - -#ifdef CONFIG_X86_PAT -int __read_mostly pat_enabled = 1; - -static inline void pat_disable(const char *reason) -{ - pat_enabled = 0; - printk(KERN_INFO "%s\n", reason); -} - -static int __init nopat(char *str) -{ - pat_disable("PAT support disabled."); - return 0; -} -early_param("nopat", nopat); -#else -static inline void pat_disable(const char *reason) -{ - (void)reason; -} -#endif - - -int pat_debug_enable; - -static int __init pat_debug_setup(char *str) -{ - pat_debug_enable = 1; - return 0; -} -__setup("debugpat", pat_debug_setup); - -static u64 __read_mostly boot_pat_state; - -enum { - PAT_UC = 0, /* uncached */ - PAT_WC = 1, /* Write combining */ - PAT_WT = 4, /* Write Through */ - PAT_WP = 5, /* Write Protected */ - PAT_WB = 6, /* Write Back (default) */ - PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ -}; - -#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) - -void pat_init(void) -{ - u64 pat; - bool boot_cpu = !boot_pat_state; - - if (!pat_enabled) - return; - - if (!cpu_has_pat) { - if (!boot_pat_state) { - pat_disable("PAT not supported by CPU."); - return; - } else { - /* - * If this happens we are on a secondary CPU, but - * switched to PAT on the boot CPU. We have no way to - * undo PAT. - */ - printk(KERN_ERR "PAT enabled, " - "but not supported by secondary CPU\n"); - BUG(); - } - } - - /* Set PWT to Write-Combining. All other bits stay the same */ - /* - * PTE encoding used in Linux: - * PAT - * |PCD - * ||PWT - * ||| - * 000 WB _PAGE_CACHE_WB - * 001 WC _PAGE_CACHE_WC - * 010 UC- _PAGE_CACHE_UC_MINUS - * 011 UC _PAGE_CACHE_UC - * PAT bit unused - */ - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | - PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); - - /* Boot CPU check */ - if (!boot_pat_state) - rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); - - wrmsrl(MSR_IA32_CR_PAT, pat); - - if (boot_cpu) - printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", - smp_processor_id(), boot_pat_state, pat); -} - -#undef PAT - -static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ - -/* - * Does intersection of PAT memory type and MTRR memory type and returns - * the resulting memory type as PAT understands it. - * (Type in pat and mtrr will not have same value) - * The intersection is based on "Effective Memory Type" tables in IA-32 - * SDM vol 3a - */ -static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) -{ - /* - * Look for MTRR hint to get the effective type in case where PAT - * request is for WB. - */ - if (req_type == _PAGE_CACHE_WB) { - u8 mtrr_type; - - mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type != MTRR_TYPE_WRBACK) - return _PAGE_CACHE_UC_MINUS; - - return _PAGE_CACHE_WB; - } - - return req_type; -} - -static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) -{ - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; - - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - /* - * For legacy reasons, physical address range in the legacy ISA - * region is tracked as non-RAM. This will allow users of - * /dev/mem to map portions of legacy ISA region, even when - * some of those portions are listed(or not even listed) with - * different e820 types(RAM/reserved/..) - */ - if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && - page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; - } - - return ram_page; -} - -/* - * For RAM pages, we use page flags to mark the pages with appropriate type. - * Here we do two pass: - * - Find the memtype of all the pages in the range, look for any conflicts - * - In case of no conflicts, set the new memtype for pages in the range - */ -static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, - unsigned long *new_type) -{ - struct page *page; - u64 pfn; - - if (req_type == _PAGE_CACHE_UC) { - /* We do not support strong UC */ - WARN_ON_ONCE(1); - req_type = _PAGE_CACHE_UC_MINUS; - } - - for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - unsigned long type; - - page = pfn_to_page(pfn); - type = get_page_memtype(page); - if (type != -1) { - printk(KERN_INFO "reserve_ram_pages_type failed " - "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", - start, end, type, req_type); - if (new_type) - *new_type = type; - - return -EBUSY; - } - } - - if (new_type) - *new_type = req_type; - - for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - page = pfn_to_page(pfn); - set_page_memtype(page, req_type); - } - return 0; -} - -static int free_ram_pages_type(u64 start, u64 end) -{ - struct page *page; - u64 pfn; - - for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - page = pfn_to_page(pfn); - set_page_memtype(page, -1); - } - return 0; -} - -/* - * req_type typically has one of the: - * - _PAGE_CACHE_WB - * - _PAGE_CACHE_WC - * - _PAGE_CACHE_UC_MINUS - * - _PAGE_CACHE_UC - * - * If new_type is NULL, function will return an error if it cannot reserve the - * region with req_type. If new_type is non-NULL, function will return - * available type in new_type in case of no error. In case of any error - * it will return a negative return value. - */ -int reserve_memtype(u64 start, u64 end, unsigned long req_type, - unsigned long *new_type) -{ - struct memtype *new; - unsigned long actual_type; - int is_range_ram; - int err = 0; - - BUG_ON(start >= end); /* end is exclusive */ - - if (!pat_enabled) { - /* This is identical to page table setting without PAT */ - if (new_type) { - if (req_type == _PAGE_CACHE_WC) - *new_type = _PAGE_CACHE_UC_MINUS; - else - *new_type = req_type & _PAGE_CACHE_MASK; - } - return 0; - } - - /* Low ISA region is always mapped WB in page table. No need to track */ - if (x86_platform.is_untracked_pat_range(start, end)) { - if (new_type) - *new_type = _PAGE_CACHE_WB; - return 0; - } - - /* - * Call mtrr_lookup to get the type hint. This is an - * optimization for /dev/mem mmap'ers into WB memory (BIOS - * tools and ACPI tools). Use WB request for WB memory and use - * UC_MINUS otherwise. - */ - actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK); - - if (new_type) - *new_type = actual_type; - - is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) { - - err = reserve_ram_pages_type(start, end, req_type, new_type); - - return err; - } else if (is_range_ram < 0) { - return -EINVAL; - } - - new = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!new) - return -ENOMEM; - - new->start = start; - new->end = end; - new->type = actual_type; - - spin_lock(&memtype_lock); - - err = rbt_memtype_check_insert(new, new_type); - if (err) { - printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " - "track %s, req %s\n", - start, end, cattr_name(new->type), cattr_name(req_type)); - kfree(new); - spin_unlock(&memtype_lock); - - return err; - } - - spin_unlock(&memtype_lock); - - dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", - start, end, cattr_name(new->type), cattr_name(req_type), - new_type ? cattr_name(*new_type) : "-"); - - return err; -} - -int free_memtype(u64 start, u64 end) -{ - int err = -EINVAL; - int is_range_ram; - struct memtype *entry; - - if (!pat_enabled) - return 0; - - /* Low ISA region is always mapped WB. No need to track */ - if (x86_platform.is_untracked_pat_range(start, end)) - return 0; - - is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) { - - err = free_ram_pages_type(start, end); - - return err; - } else if (is_range_ram < 0) { - return -EINVAL; - } - - spin_lock(&memtype_lock); - entry = rbt_memtype_erase(start, end); - spin_unlock(&memtype_lock); - - if (!entry) { - printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", - current->comm, current->pid, start, end); - return -EINVAL; - } - - kfree(entry); - - dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); - - return 0; -} - - -/** - * lookup_memtype - Looksup the memory type for a physical address - * @paddr: physical address of which memory type needs to be looked up - * - * Only to be called when PAT is enabled - * - * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or - * _PAGE_CACHE_UC - */ -static unsigned long lookup_memtype(u64 paddr) -{ - int rettype = _PAGE_CACHE_WB; - struct memtype *entry; - - if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) - return rettype; - - if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { - struct page *page; - page = pfn_to_page(paddr >> PAGE_SHIFT); - rettype = get_page_memtype(page); - /* - * -1 from get_page_memtype() implies RAM page is in its - * default state and not reserved, and hence of type WB - */ - if (rettype == -1) - rettype = _PAGE_CACHE_WB; - - return rettype; - } - - spin_lock(&memtype_lock); - - entry = rbt_memtype_lookup(paddr); - if (entry != NULL) - rettype = entry->type; - else - rettype = _PAGE_CACHE_UC_MINUS; - - spin_unlock(&memtype_lock); - return rettype; -} - -/** - * io_reserve_memtype - Request a memory type mapping for a region of memory - * @start: start (physical address) of the region - * @end: end (physical address) of the region - * @type: A pointer to memtype, with requested type. On success, requested - * or any other compatible type that was available for the region is returned - * - * On success, returns 0 - * On failure, returns non-zero - */ -int io_reserve_memtype(resource_size_t start, resource_size_t end, - unsigned long *type) -{ - resource_size_t size = end - start; - unsigned long req_type = *type; - unsigned long new_type; - int ret; - - WARN_ON_ONCE(iomem_map_sanity_check(start, size)); - - ret = reserve_memtype(start, end, req_type, &new_type); - if (ret) - goto out_err; - - if (!is_new_memtype_allowed(start, size, req_type, new_type)) - goto out_free; - - if (kernel_map_sync_memtype(start, size, new_type) < 0) - goto out_free; - - *type = new_type; - return 0; - -out_free: - free_memtype(start, end); - ret = -EBUSY; -out_err: - return ret; -} - -/** - * io_free_memtype - Release a memory type mapping for a region of memory - * @start: start (physical address) of the region - * @end: end (physical address) of the region - */ -void io_free_memtype(resource_size_t start, resource_size_t end) -{ - free_memtype(start, end); -} - -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot) -{ - return vma_prot; -} - -#ifdef CONFIG_STRICT_DEVMEM -/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - return 1; -} -#else -/* This check is needed to avoid cache aliasing when PAT is enabled */ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - u64 from = ((u64)pfn) << PAGE_SHIFT; - u64 to = from + size; - u64 cursor = from; - - if (!pat_enabled) - return 1; - - while (cursor < to) { - if (!devmem_is_allowed(pfn)) { - printk(KERN_INFO - "Program %s tried to access /dev/mem between %Lx->%Lx.\n", - current->comm, from, to); - return 0; - } - cursor += PAGE_SIZE; - pfn++; - } - return 1; -} -#endif /* CONFIG_STRICT_DEVMEM */ - -int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t *vma_prot) -{ - unsigned long flags = _PAGE_CACHE_WB; - - if (!range_is_allowed(pfn, size)) - return 0; - - if (file->f_flags & O_DSYNC) - flags = _PAGE_CACHE_UC_MINUS; - -#ifdef CONFIG_X86_32 - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting UC or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - if (!pat_enabled && - !(boot_cpu_has(X86_FEATURE_MTRR) || - boot_cpu_has(X86_FEATURE_K6_MTRR) || - boot_cpu_has(X86_FEATURE_CYRIX_ARR) || - boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { - flags = _PAGE_CACHE_UC; - } -#endif - - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | - flags); - return 1; -} - -/* - * Change the memory type for the physial address range in kernel identity - * mapping space if that range is a part of identity map. - */ -int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) -{ - unsigned long id_sz; - - if (base >= __pa(high_memory)) - return 0; - - id_sz = (__pa(high_memory) < base + size) ? - __pa(high_memory) - base : - size; - - if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { - printk(KERN_INFO - "%s:%d ioremap_change_attr failed %s " - "for %Lx-%Lx\n", - current->comm, current->pid, - cattr_name(flags), - base, (unsigned long long)(base + size)); - return -EINVAL; - } - return 0; -} - -/* - * Internal interface to reserve a range of physical memory with prot. - * Reserved non RAM regions only and after successful reserve_memtype, - * this func also keeps identity mapping (if any) in sync with this new prot. - */ -static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, - int strict_prot) -{ - int is_ram = 0; - int ret; - unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); - unsigned long flags = want_flags; - - is_ram = pat_pagerange_is_ram(paddr, paddr + size); - - /* - * reserve_pfn_range() for RAM pages. We do not refcount to keep - * track of number of mappings of RAM pages. We can assert that - * the type requested matches the type of first page in the range. - */ - if (is_ram) { - if (!pat_enabled) - return 0; - - flags = lookup_memtype(paddr); - if (want_flags != flags) { - printk(KERN_WARNING - "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", - current->comm, current->pid, - cattr_name(want_flags), - (unsigned long long)paddr, - (unsigned long long)(paddr + size), - cattr_name(flags)); - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - flags); - } - return 0; - } - - ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); - if (ret) - return ret; - - if (flags != want_flags) { - if (strict_prot || - !is_new_memtype_allowed(paddr, size, want_flags, flags)) { - free_memtype(paddr, paddr + size); - printk(KERN_ERR "%s:%d map pfn expected mapping type %s" - " for %Lx-%Lx, got %s\n", - current->comm, current->pid, - cattr_name(want_flags), - (unsigned long long)paddr, - (unsigned long long)(paddr + size), - cattr_name(flags)); - return -EINVAL; - } - /* - * We allow returning different type than the one requested in - * non strict case. - */ - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - flags); - } - - if (kernel_map_sync_memtype(paddr, size, flags) < 0) { - free_memtype(paddr, paddr + size); - return -EINVAL; - } - return 0; -} - -/* - * Internal interface to free a range of physical memory. - * Frees non RAM regions only. - */ -static void free_pfn_range(u64 paddr, unsigned long size) -{ - int is_ram; - - is_ram = pat_pagerange_is_ram(paddr, paddr + size); - if (is_ram == 0) - free_memtype(paddr, paddr + size); -} - -/* - * track_pfn_vma_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). - * - * If the vma has a linear pfn mapping for the entire range, we get the prot - * from pte and reserve the entire vma range with single reserve_pfn_range call. - */ -int track_pfn_vma_copy(struct vm_area_struct *vma) -{ - resource_size_t paddr; - unsigned long prot; - unsigned long vma_size = vma->vm_end - vma->vm_start; - pgprot_t pgprot; - - if (is_linear_pfn_mapping(vma)) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); - return -EINVAL; - } - pgprot = __pgprot(prot); - return reserve_pfn_range(paddr, vma_size, &pgprot, 1); - } - - return 0; -} - -/* - * track_pfn_vma_new is called when a _new_ pfn mapping is being established - * for physical range indicated by pfn and size. - * - * prot is passed in as a parameter for the new mapping. If the vma has a - * linear pfn mapping for the entire range reserve the entire vma range with - * single reserve_pfn_range call. - */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long size) -{ - unsigned long flags; - resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; - - if (is_linear_pfn_mapping(vma)) { - /* reserve the whole chunk starting from vm_pgoff */ - paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot, 0); - } - - if (!pat_enabled) - return 0; - - /* for vm_insert_pfn and friends, we set prot based on lookup */ - flags = lookup_memtype(pfn << PAGE_SHIFT); - *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | - flags); - - return 0; -} - -/* - * untrack_pfn_vma is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case size can be zero). - */ -void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size) -{ - resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; - - if (is_linear_pfn_mapping(vma)) { - /* free the whole chunk starting from vm_pgoff */ - paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - free_pfn_range(paddr, vma_size); - return; - } -} - -pgprot_t pgprot_writecombine(pgprot_t prot) -{ - if (pat_enabled) - return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC); - else - return pgprot_noncached(prot); -} -EXPORT_SYMBOL_GPL(pgprot_writecombine); - -#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) - -static struct memtype *memtype_get_idx(loff_t pos) -{ - struct memtype *print_entry; - int ret; - - print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!print_entry) - return NULL; - - spin_lock(&memtype_lock); - ret = rbt_memtype_copy_nth_element(print_entry, pos); - spin_unlock(&memtype_lock); - - if (!ret) { - return print_entry; - } else { - kfree(print_entry); - return NULL; - } -} - -static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos == 0) { - ++*pos; - seq_printf(seq, "PAT memtype list:\n"); - } - - return memtype_get_idx(*pos); -} - -static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return memtype_get_idx(*pos); -} - -static void memtype_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int memtype_seq_show(struct seq_file *seq, void *v) -{ - struct memtype *print_entry = (struct memtype *)v; - - seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), - print_entry->start, print_entry->end); - kfree(print_entry); - - return 0; -} - -static const struct seq_operations memtype_seq_ops = { - .start = memtype_seq_start, - .next = memtype_seq_next, - .stop = memtype_seq_stop, - .show = memtype_seq_show, -}; - -static int memtype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &memtype_seq_ops); -} - -static const struct file_operations memtype_fops = { - .open = memtype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int __init pat_memtype_list_init(void) -{ - if (pat_enabled) { - debugfs_create_file("pat_memtype_list", S_IRUSR, - arch_debugfs_dir, NULL, &memtype_fops); - } - return 0; -} - -late_initcall(pat_memtype_list_init); - -#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ diff --git a/ANDROID_3.4.5/arch/x86/mm/pat_internal.h b/ANDROID_3.4.5/arch/x86/mm/pat_internal.h deleted file mode 100644 index 77e5ba15..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pat_internal.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef __PAT_INTERNAL_H_ -#define __PAT_INTERNAL_H_ - -extern int pat_debug_enable; - -#define dprintk(fmt, arg...) \ - do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) - -struct memtype { - u64 start; - u64 end; - u64 subtree_max_end; - unsigned long type; - struct rb_node rb; -}; - -static inline char *cattr_name(unsigned long flags) -{ - switch (flags & _PAGE_CACHE_MASK) { - case _PAGE_CACHE_UC: return "uncached"; - case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; - case _PAGE_CACHE_WB: return "write-back"; - case _PAGE_CACHE_WC: return "write-combining"; - default: return "broken"; - } -} - -#ifdef CONFIG_X86_PAT -extern int rbt_memtype_check_insert(struct memtype *new, - unsigned long *new_type); -extern struct memtype *rbt_memtype_erase(u64 start, u64 end); -extern struct memtype *rbt_memtype_lookup(u64 addr); -extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); -#else -static inline int rbt_memtype_check_insert(struct memtype *new, - unsigned long *new_type) -{ return 0; } -static inline struct memtype *rbt_memtype_erase(u64 start, u64 end) -{ return NULL; } -static inline struct memtype *rbt_memtype_lookup(u64 addr) -{ return NULL; } -static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ return 0; } -#endif - -#endif /* __PAT_INTERNAL_H_ */ diff --git a/ANDROID_3.4.5/arch/x86/mm/pat_rbtree.c b/ANDROID_3.4.5/arch/x86/mm/pat_rbtree.c deleted file mode 100644 index 8acaddd0..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pat_rbtree.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * Suresh B Siddha <suresh.b.siddha@intel.com> - * - * Interval tree (augmented rbtree) used to store the PAT memory type - * reservations. - */ - -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/rbtree.h> -#include <linux/sched.h> -#include <linux/gfp.h> - -#include <asm/pgtable.h> -#include <asm/pat.h> - -#include "pat_internal.h" - -/* - * The memtype tree keeps track of memory type for specific - * physical memory areas. Without proper tracking, conflicting memory - * types in different mappings can cause CPU cache corruption. - * - * The tree is an interval tree (augmented rbtree) with tree ordered - * on starting address. Tree can contain multiple entries for - * different regions which overlap. All the aliases have the same - * cache attributes of course. - * - * memtype_lock protects the rbtree. - */ - -static struct rb_root memtype_rbroot = RB_ROOT; - -static int is_node_overlap(struct memtype *node, u64 start, u64 end) -{ - if (node->start >= end || node->end <= start) - return 0; - - return 1; -} - -static u64 get_subtree_max_end(struct rb_node *node) -{ - u64 ret = 0; - if (node) { - struct memtype *data = container_of(node, struct memtype, rb); - ret = data->subtree_max_end; - } - return ret; -} - -/* Update 'subtree_max_end' for a node, based on node and its children */ -static void memtype_rb_augment_cb(struct rb_node *node, void *__unused) -{ - struct memtype *data; - u64 max_end, child_max_end; - - if (!node) - return; - - data = container_of(node, struct memtype, rb); - max_end = data->end; - - child_max_end = get_subtree_max_end(node->rb_right); - if (child_max_end > max_end) - max_end = child_max_end; - - child_max_end = get_subtree_max_end(node->rb_left); - if (child_max_end > max_end) - max_end = child_max_end; - - data->subtree_max_end = max_end; -} - -/* Find the first (lowest start addr) overlapping range from rb tree */ -static struct memtype *memtype_rb_lowest_match(struct rb_root *root, - u64 start, u64 end) -{ - struct rb_node *node = root->rb_node; - struct memtype *last_lower = NULL; - - while (node) { - struct memtype *data = container_of(node, struct memtype, rb); - - if (get_subtree_max_end(node->rb_left) > start) { - /* Lowest overlap if any must be on left side */ - node = node->rb_left; - } else if (is_node_overlap(data, start, end)) { - last_lower = data; - break; - } else if (start >= data->start) { - /* Lowest overlap if any must be on right side */ - node = node->rb_right; - } else { - break; - } - } - return last_lower; /* Returns NULL if there is no overlap */ -} - -static struct memtype *memtype_rb_exact_match(struct rb_root *root, - u64 start, u64 end) -{ - struct memtype *match; - - match = memtype_rb_lowest_match(root, start, end); - while (match != NULL && match->start < end) { - struct rb_node *node; - - if (match->start == start && match->end == end) - return match; - - node = rb_next(&match->rb); - if (node) - match = container_of(node, struct memtype, rb); - else - match = NULL; - } - - return NULL; /* Returns NULL if there is no exact match */ -} - -static int memtype_rb_check_conflict(struct rb_root *root, - u64 start, u64 end, - unsigned long reqtype, unsigned long *newtype) -{ - struct rb_node *node; - struct memtype *match; - int found_type = reqtype; - - match = memtype_rb_lowest_match(&memtype_rbroot, start, end); - if (match == NULL) - goto success; - - if (match->type != found_type && newtype == NULL) - goto failure; - - dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); - found_type = match->type; - - node = rb_next(&match->rb); - while (node) { - match = container_of(node, struct memtype, rb); - - if (match->start >= end) /* Checked all possible matches */ - goto success; - - if (is_node_overlap(match, start, end) && - match->type != found_type) { - goto failure; - } - - node = rb_next(&match->rb); - } -success: - if (newtype) - *newtype = found_type; - - return 0; - -failure: - printk(KERN_INFO "%s:%d conflicting memory types " - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, - end, cattr_name(found_type), cattr_name(match->type)); - return -EBUSY; -} - -static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) -{ - struct rb_node **node = &(root->rb_node); - struct rb_node *parent = NULL; - - while (*node) { - struct memtype *data = container_of(*node, struct memtype, rb); - - parent = *node; - if (newdata->start <= data->start) - node = &((*node)->rb_left); - else if (newdata->start > data->start) - node = &((*node)->rb_right); - } - - rb_link_node(&newdata->rb, parent, node); - rb_insert_color(&newdata->rb, root); - rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL); -} - -int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) -{ - int err = 0; - - err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, - new->type, ret_type); - - if (!err) { - if (ret_type) - new->type = *ret_type; - - new->subtree_max_end = new->end; - memtype_rb_insert(&memtype_rbroot, new); - } - return err; -} - -struct memtype *rbt_memtype_erase(u64 start, u64 end) -{ - struct rb_node *deepest; - struct memtype *data; - - data = memtype_rb_exact_match(&memtype_rbroot, start, end); - if (!data) - goto out; - - deepest = rb_augment_erase_begin(&data->rb); - rb_erase(&data->rb, &memtype_rbroot); - rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL); -out: - return data; -} - -struct memtype *rbt_memtype_lookup(u64 addr) -{ - struct memtype *data; - data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); - return data; -} - -#if defined(CONFIG_DEBUG_FS) -int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ - struct rb_node *node; - int i = 1; - - node = rb_first(&memtype_rbroot); - while (node && pos != i) { - node = rb_next(node); - i++; - } - - if (node) { /* pos == i */ - struct memtype *this = container_of(node, struct memtype, rb); - *out = *this; - return 0; - } else { - return 1; - } -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/mm/pf_in.c b/ANDROID_3.4.5/arch/x86/mm/pf_in.c deleted file mode 100644 index 9f0614da..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pf_in.c +++ /dev/null @@ -1,532 +0,0 @@ -/* - * Fault Injection Test harness (FI) - * Copyright (C) Intel Crop. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - * USA. - * - */ - -/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp - * Copyright by Intel Crop., 2002 - * Louis Zhuang (louis.zhuang@intel.com) - * - * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 - */ - -#include <linux/module.h> -#include <linux/ptrace.h> /* struct pt_regs */ -#include "pf_in.h" - -#ifdef __i386__ -/* IA32 Manual 3, 2-1 */ -static unsigned char prefix_codes[] = { - 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, - 0x65, 0x66, 0x67 -}; -/* IA32 Manual 3, 3-432*/ -static unsigned int reg_rop[] = { - 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB }; -static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -/* IA32 Manual 3, 3-432*/ -static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA }; -static unsigned int rw32[] = { - 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB -}; -static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA }; -static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB }; -static unsigned int mw64[] = {}; -#else /* not __i386__ */ -static unsigned char prefix_codes[] = { - 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, - 0xF0, 0xF3, 0xF2, - /* REX Prefixes */ - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f -}; -/* AMD64 Manual 3, Appendix A*/ -static unsigned int reg_rop[] = { - 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB }; -static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA }; -static unsigned int rw32[] = { - 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB -}; -/* 8 bit only */ -static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA }; -/* 16 bit only */ -static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -/* 16 or 32 bit */ -static unsigned int mw32[] = { 0xC7 }; -/* 16, 32 or 64 bit */ -static unsigned int mw64[] = { 0x89, 0x8B, 0xAB }; -#endif /* not __i386__ */ - -struct prefix_bits { - unsigned shorted:1; - unsigned enlarged:1; - unsigned rexr:1; - unsigned rex:1; -}; - -static int skip_prefix(unsigned char *addr, struct prefix_bits *prf) -{ - int i; - unsigned char *p = addr; - prf->shorted = 0; - prf->enlarged = 0; - prf->rexr = 0; - prf->rex = 0; - -restart: - for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { - if (*p == prefix_codes[i]) { - if (*p == 0x66) - prf->shorted = 1; -#ifdef __amd64__ - if ((*p & 0xf8) == 0x48) - prf->enlarged = 1; - if ((*p & 0xf4) == 0x44) - prf->rexr = 1; - if ((*p & 0xf0) == 0x40) - prf->rex = 1; -#endif - p++; - goto restart; - } - } - - return (p - addr); -} - -static int get_opcode(unsigned char *addr, unsigned int *opcode) -{ - int len; - - if (*addr == 0x0F) { - /* 0x0F is extension instruction */ - *opcode = *(unsigned short *)addr; - len = 2; - } else { - *opcode = *addr; - len = 1; - } - - return len; -} - -#define CHECK_OP_TYPE(opcode, array, type) \ - for (i = 0; i < ARRAY_SIZE(array); i++) { \ - if (array[i] == opcode) { \ - rv = type; \ - goto exit; \ - } \ - } - -enum reason_type get_ins_type(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - struct prefix_bits prf; - int i; - enum reason_type rv = OTHERS; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &prf); - p += get_opcode(p, &opcode); - - CHECK_OP_TYPE(opcode, reg_rop, REG_READ); - CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); - CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); - -exit: - return rv; -} -#undef CHECK_OP_TYPE - -static unsigned int get_ins_reg_width(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - struct prefix_bits prf; - int i; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &prf); - p += get_opcode(p, &opcode); - - for (i = 0; i < ARRAY_SIZE(rw8); i++) - if (rw8[i] == opcode) - return 1; - - for (i = 0; i < ARRAY_SIZE(rw32); i++) - if (rw32[i] == opcode) - return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); - - printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); - return 0; -} - -unsigned int get_ins_mem_width(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - struct prefix_bits prf; - int i; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &prf); - p += get_opcode(p, &opcode); - - for (i = 0; i < ARRAY_SIZE(mw8); i++) - if (mw8[i] == opcode) - return 1; - - for (i = 0; i < ARRAY_SIZE(mw16); i++) - if (mw16[i] == opcode) - return 2; - - for (i = 0; i < ARRAY_SIZE(mw32); i++) - if (mw32[i] == opcode) - return prf.shorted ? 2 : 4; - - for (i = 0; i < ARRAY_SIZE(mw64); i++) - if (mw64[i] == opcode) - return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); - - printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); - return 0; -} - -/* - * Define register ident in mod/rm byte. - * Note: these are NOT the same as in ptrace-abi.h. - */ -enum { - arg_AL = 0, - arg_CL = 1, - arg_DL = 2, - arg_BL = 3, - arg_AH = 4, - arg_CH = 5, - arg_DH = 6, - arg_BH = 7, - - arg_AX = 0, - arg_CX = 1, - arg_DX = 2, - arg_BX = 3, - arg_SP = 4, - arg_BP = 5, - arg_SI = 6, - arg_DI = 7, -#ifdef __amd64__ - arg_R8 = 8, - arg_R9 = 9, - arg_R10 = 10, - arg_R11 = 11, - arg_R12 = 12, - arg_R13 = 13, - arg_R14 = 14, - arg_R15 = 15 -#endif -}; - -static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs) -{ - unsigned char *rv = NULL; - - switch (no) { - case arg_AL: - rv = (unsigned char *)®s->ax; - break; - case arg_BL: - rv = (unsigned char *)®s->bx; - break; - case arg_CL: - rv = (unsigned char *)®s->cx; - break; - case arg_DL: - rv = (unsigned char *)®s->dx; - break; -#ifdef __amd64__ - case arg_R8: - rv = (unsigned char *)®s->r8; - break; - case arg_R9: - rv = (unsigned char *)®s->r9; - break; - case arg_R10: - rv = (unsigned char *)®s->r10; - break; - case arg_R11: - rv = (unsigned char *)®s->r11; - break; - case arg_R12: - rv = (unsigned char *)®s->r12; - break; - case arg_R13: - rv = (unsigned char *)®s->r13; - break; - case arg_R14: - rv = (unsigned char *)®s->r14; - break; - case arg_R15: - rv = (unsigned char *)®s->r15; - break; -#endif - default: - break; - } - - if (rv) - return rv; - - if (rex) { - /* - * If REX prefix exists, access low bytes of SI etc. - * instead of AH etc. - */ - switch (no) { - case arg_SI: - rv = (unsigned char *)®s->si; - break; - case arg_DI: - rv = (unsigned char *)®s->di; - break; - case arg_BP: - rv = (unsigned char *)®s->bp; - break; - case arg_SP: - rv = (unsigned char *)®s->sp; - break; - default: - break; - } - } else { - switch (no) { - case arg_AH: - rv = 1 + (unsigned char *)®s->ax; - break; - case arg_BH: - rv = 1 + (unsigned char *)®s->bx; - break; - case arg_CH: - rv = 1 + (unsigned char *)®s->cx; - break; - case arg_DH: - rv = 1 + (unsigned char *)®s->dx; - break; - default: - break; - } - } - - if (!rv) - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); - - return rv; -} - -static unsigned long *get_reg_w32(int no, struct pt_regs *regs) -{ - unsigned long *rv = NULL; - - switch (no) { - case arg_AX: - rv = ®s->ax; - break; - case arg_BX: - rv = ®s->bx; - break; - case arg_CX: - rv = ®s->cx; - break; - case arg_DX: - rv = ®s->dx; - break; - case arg_SP: - rv = ®s->sp; - break; - case arg_BP: - rv = ®s->bp; - break; - case arg_SI: - rv = ®s->si; - break; - case arg_DI: - rv = ®s->di; - break; -#ifdef __amd64__ - case arg_R8: - rv = ®s->r8; - break; - case arg_R9: - rv = ®s->r9; - break; - case arg_R10: - rv = ®s->r10; - break; - case arg_R11: - rv = ®s->r11; - break; - case arg_R12: - rv = ®s->r12; - break; - case arg_R13: - rv = ®s->r13; - break; - case arg_R14: - rv = ®s->r14; - break; - case arg_R15: - rv = ®s->r15; - break; -#endif - default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); - } - - return rv; -} - -unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) -{ - unsigned int opcode; - int reg; - unsigned char *p; - struct prefix_bits prf; - int i; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &prf); - p += get_opcode(p, &opcode); - for (i = 0; i < ARRAY_SIZE(reg_rop); i++) - if (reg_rop[i] == opcode) - goto do_work; - - for (i = 0; i < ARRAY_SIZE(reg_wop); i++) - if (reg_wop[i] == opcode) - goto do_work; - - printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " - "0x%02x\n", opcode); - goto err; - -do_work: - /* for STOS, source register is fixed */ - if (opcode == 0xAA || opcode == 0xAB) { - reg = arg_AX; - } else { - unsigned char mod_rm = *p; - reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); - } - switch (get_ins_reg_width(ins_addr)) { - case 1: - return *get_reg_w8(reg, prf.rex, regs); - - case 2: - return *(unsigned short *)get_reg_w32(reg, regs); - - case 4: - return *(unsigned int *)get_reg_w32(reg, regs); - -#ifdef __amd64__ - case 8: - return *(unsigned long *)get_reg_w32(reg, regs); -#endif - - default: - printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); - } - -err: - return 0; -} - -unsigned long get_ins_imm_val(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char mod_rm; - unsigned char mod; - unsigned char *p; - struct prefix_bits prf; - int i; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &prf); - p += get_opcode(p, &opcode); - for (i = 0; i < ARRAY_SIZE(imm_wop); i++) - if (imm_wop[i] == opcode) - goto do_work; - - printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " - "0x%02x\n", opcode); - goto err; - -do_work: - mod_rm = *p; - mod = mod_rm >> 6; - p++; - switch (mod) { - case 0: - /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ - /* AMD64: XXX Check for address size prefix? */ - if ((mod_rm & 0x7) == 0x5) - p += 4; - break; - - case 1: - p += 1; - break; - - case 2: - p += 4; - break; - - case 3: - default: - printk(KERN_ERR "mmiotrace: not a memory access instruction " - "at 0x%lx, rm_mod=0x%02x\n", - ins_addr, mod_rm); - } - - switch (get_ins_reg_width(ins_addr)) { - case 1: - return *(unsigned char *)p; - - case 2: - return *(unsigned short *)p; - - case 4: - return *(unsigned int *)p; - -#ifdef __amd64__ - case 8: - return *(unsigned long *)p; -#endif - - default: - printk(KERN_ERR "mmiotrace: Error: width.\n"); - } - -err: - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/pf_in.h b/ANDROID_3.4.5/arch/x86/mm/pf_in.h deleted file mode 100644 index e05341a5..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pf_in.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Fault Injection Test harness (FI) - * Copyright (C) Intel Crop. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - * USA. - * - */ - -#ifndef __PF_H_ -#define __PF_H_ - -enum reason_type { - NOT_ME, /* page fault is not in regions */ - NOTHING, /* access others point in regions */ - REG_READ, /* read from addr to reg */ - REG_WRITE, /* write from reg to addr */ - IMM_WRITE, /* write from imm to addr */ - OTHERS /* Other instructions can not intercept */ -}; - -enum reason_type get_ins_type(unsigned long ins_addr); -unsigned int get_ins_mem_width(unsigned long ins_addr); -unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); -unsigned long get_ins_imm_val(unsigned long ins_addr); - -#endif /* __PF_H_ */ diff --git a/ANDROID_3.4.5/arch/x86/mm/pgtable.c b/ANDROID_3.4.5/arch/x86/mm/pgtable.c deleted file mode 100644 index 8573b83a..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pgtable.c +++ /dev/null @@ -1,447 +0,0 @@ -#include <linux/mm.h> -#include <linux/gfp.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> -#include <asm/tlb.h> -#include <asm/fixmap.h> - -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO - -#ifdef CONFIG_HIGHPTE -#define PGALLOC_USER_GFP __GFP_HIGHMEM -#else -#define PGALLOC_USER_GFP 0 -#endif - -gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; - -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - return (pte_t *)__get_free_page(PGALLOC_GFP); -} - -pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *pte; - - pte = alloc_pages(__userpte_alloc_gfp, 0); - if (pte) - pgtable_page_ctor(pte); - return pte; -} - -static int __init setup_userpte(char *arg) -{ - if (!arg) - return -EINVAL; - - /* - * "userpte=nohigh" disables allocation of user pagetables in - * high memory. - */ - if (strcmp(arg, "nohigh") == 0) - __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - else - return -EINVAL; - return 0; -} -early_param("userpte", setup_userpte); - -void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) -{ - pgtable_page_dtor(pte); - paravirt_release_pte(page_to_pfn(pte)); - tlb_remove_page(tlb, pte); -} - -#if PAGETABLE_LEVELS > 2 -void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) -{ - paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); - tlb_remove_page(tlb, virt_to_page(pmd)); -} - -#if PAGETABLE_LEVELS > 3 -void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) -{ - paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); - tlb_remove_page(tlb, virt_to_page(pud)); -} -#endif /* PAGETABLE_LEVELS > 3 */ -#endif /* PAGETABLE_LEVELS > 2 */ - -static inline void pgd_list_add(pgd_t *pgd) -{ - struct page *page = virt_to_page(pgd); - - list_add(&page->lru, &pgd_list); -} - -static inline void pgd_list_del(pgd_t *pgd) -{ - struct page *page = virt_to_page(pgd); - - list_del(&page->lru); -} - -#define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) - - -static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) -{ - BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); - virt_to_page(pgd)->index = (pgoff_t)mm; -} - -struct mm_struct *pgd_page_get_mm(struct page *page) -{ - return (struct mm_struct *)page->index; -} - -static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) -{ - /* If the pgd points to a shared pagetable level (either the - ptes in non-PAE, or shared PMD in PAE), then just copy the - references from swapper_pg_dir. */ - if (PAGETABLE_LEVELS == 2 || - (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || - PAGETABLE_LEVELS == 4) { - clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - } - - /* list required to sync kernel mapping updates */ - if (!SHARED_KERNEL_PMD) { - pgd_set_mm(pgd, mm); - pgd_list_add(pgd); - } -} - -static void pgd_dtor(pgd_t *pgd) -{ - if (SHARED_KERNEL_PMD) - return; - - spin_lock(&pgd_lock); - pgd_list_del(pgd); - spin_unlock(&pgd_lock); -} - -/* - * List of all pgd's needed for non-PAE so it can invalidate entries - * in both cached and uncached pgd's; not needed for PAE since the - * kernel pmd is shared. If PAE were not to share the pmd a similar - * tactic would be needed. This is essentially codepath-based locking - * against pageattr.c; it is the unique case in which a valid change - * of kernel pagetables can't be lazily synchronized by vmalloc faults. - * vmalloc faults work because attached pagetables are never freed. - * -- wli - */ - -#ifdef CONFIG_X86_PAE -/* - * In PAE mode, we need to do a cr3 reload (=tlb flush) when - * updating the top-level pagetable entries to guarantee the - * processor notices the update. Since this is expensive, and - * all 4 top-level entries are used almost immediately in a - * new process's life, we just pre-populate them here. - * - * Also, if we're in a paravirt environment where the kernel pmd is - * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate - * and initialize the kernel pmds here. - */ -#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD - -void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) -{ - paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); - - /* Note: almost everything apart from _PAGE_PRESENT is - reserved at the pmd (PDPT) level. */ - set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); - - /* - * According to Intel App note "TLBs, Paging-Structure Caches, - * and Their Invalidation", April 2007, document 317080-001, - * section 8.1: in PAE mode we explicitly have to flush the - * TLB via cr3 if the top-level pgd is changed... - */ - flush_tlb_mm(mm); -} -#else /* !CONFIG_X86_PAE */ - -/* No need to prepopulate any pagetable entries in non-PAE modes. */ -#define PREALLOCATED_PMDS 0 - -#endif /* CONFIG_X86_PAE */ - -static void free_pmds(pmd_t *pmds[]) -{ - int i; - - for(i = 0; i < PREALLOCATED_PMDS; i++) - if (pmds[i]) - free_page((unsigned long)pmds[i]); -} - -static int preallocate_pmds(pmd_t *pmds[]) -{ - int i; - bool failed = false; - - for(i = 0; i < PREALLOCATED_PMDS; i++) { - pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); - if (pmd == NULL) - failed = true; - pmds[i] = pmd; - } - - if (failed) { - free_pmds(pmds); - return -ENOMEM; - } - - return 0; -} - -/* - * Mop up any pmd pages which may still be attached to the pgd. - * Normally they will be freed by munmap/exit_mmap, but any pmd we - * preallocate which never got a corresponding vma will need to be - * freed manually. - */ -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) -{ - int i; - - for(i = 0; i < PREALLOCATED_PMDS; i++) { - pgd_t pgd = pgdp[i]; - - if (pgd_val(pgd) != 0) { - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); - - pgdp[i] = native_make_pgd(0); - - paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); - pmd_free(mm, pmd); - } - } -} - -static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) -{ - pud_t *pud; - unsigned long addr; - int i; - - if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ - return; - - pud = pud_offset(pgd, 0); - - for (addr = i = 0; i < PREALLOCATED_PMDS; - i++, pud++, addr += PUD_SIZE) { - pmd_t *pmd = pmds[i]; - - if (i >= KERNEL_PGD_BOUNDARY) - memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), - sizeof(pmd_t) * PTRS_PER_PMD); - - pud_populate(mm, pud, pmd); - } -} - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd; - pmd_t *pmds[PREALLOCATED_PMDS]; - - pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); - - if (pgd == NULL) - goto out; - - mm->pgd = pgd; - - if (preallocate_pmds(pmds) != 0) - goto out_free_pgd; - - if (paravirt_pgd_alloc(mm) != 0) - goto out_free_pmds; - - /* - * Make sure that pre-populating the pmds is atomic with - * respect to anything walking the pgd_list, so that they - * never see a partially populated pgd. - */ - spin_lock(&pgd_lock); - - pgd_ctor(mm, pgd); - pgd_prepopulate_pmd(mm, pgd, pmds); - - spin_unlock(&pgd_lock); - - return pgd; - -out_free_pmds: - free_pmds(pmds); -out_free_pgd: - free_page((unsigned long)pgd); -out: - return NULL; -} - -void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - pgd_mop_up_pmds(mm, pgd); - pgd_dtor(pgd); - paravirt_pgd_free(mm, pgd); - free_page((unsigned long)pgd); -} - -int ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, - pte_t entry, int dirty) -{ - int changed = !pte_same(*ptep, entry); - - if (changed && dirty) { - *ptep = entry; - pte_update_defer(vma->vm_mm, address, ptep); - flush_tlb_page(vma, address); - } - - return changed; -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty) -{ - int changed = !pmd_same(*pmdp, entry); - - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - - if (changed && dirty) { - *pmdp = entry; - pmd_update_defer(vma->vm_mm, address, pmdp); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - } - - return changed; -} -#endif - -int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - int ret = 0; - - if (pte_young(*ptep)) - ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, - (unsigned long *) &ptep->pte); - - if (ret) - pte_update(vma->vm_mm, addr, ptep); - - return ret; -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmdp) -{ - int ret = 0; - - if (pmd_young(*pmdp)) - ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, - (unsigned long *)pmdp); - - if (ret) - pmd_update(vma->vm_mm, addr, pmdp); - - return ret; -} -#endif - -int ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) -{ - int young; - - young = ptep_test_and_clear_young(vma, address, ptep); - if (young) - flush_tlb_page(vma, address); - - return young; -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - int young; - - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - - young = pmdp_test_and_clear_young(vma, address, pmdp); - if (young) - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - - return young; -} - -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - int set; - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - set = !test_and_set_bit(_PAGE_BIT_SPLITTING, - (unsigned long *)pmdp); - if (set) { - pmd_update(vma->vm_mm, address, pmdp); - /* need tlb flush only to serialize against gup-fast */ - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - } -} -#endif - -/** - * reserve_top_address - reserves a hole in the top of kernel address space - * @reserve - size of hole to reserve - * - * Can be used to relocate the fixmap area and poke a hole in the top - * of kernel address space to make room for a hypervisor. - */ -void __init reserve_top_address(unsigned long reserve) -{ -#ifdef CONFIG_X86_32 - BUG_ON(fixmaps_set > 0); - printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", - (int)-reserve); - __FIXADDR_TOP = -reserve - PAGE_SIZE; -#endif -} - -int fixmaps_set; - -void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - set_pte_vaddr(address, pte); - fixmaps_set++; -} - -void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, - pgprot_t flags) -{ - __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); -} diff --git a/ANDROID_3.4.5/arch/x86/mm/pgtable_32.c b/ANDROID_3.4.5/arch/x86/mm/pgtable_32.c deleted file mode 100644 index a69bcb8c..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/pgtable_32.c +++ /dev/null @@ -1,133 +0,0 @@ -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/mm.h> -#include <linux/nmi.h> -#include <linux/swap.h> -#include <linux/smp.h> -#include <linux/highmem.h> -#include <linux/pagemap.h> -#include <linux/spinlock.h> -#include <linux/module.h> - -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/fixmap.h> -#include <asm/e820.h> -#include <asm/tlb.h> -#include <asm/tlbflush.h> -#include <asm/io.h> - -unsigned int __VMALLOC_RESERVE = 128 << 20; - -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -void set_pte_vaddr(unsigned long vaddr, pte_t pteval) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - BUG(); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - BUG(); - return; - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - BUG(); - return; - } - pte = pte_offset_kernel(pmd, vaddr); - if (pte_val(pteval)) - set_pte_at(&init_mm, vaddr, pte, pteval); - else - pte_clear(&init_mm, vaddr, pte); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -/* - * Associate a large virtual page frame with a given physical page frame - * and protection flags for that frame. pfn is for the base of the page, - * vaddr is what the page gets mapped to - both must be properly aligned. - * The pmd must already be instantiated. Assumes PAE mode. - */ -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); - return; /* BUG(); */ - } - if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); - return; /* BUG(); */ - } - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); - return; /* BUG(); */ - } - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - set_pmd(pmd, pfn_pmd(pfn, flags)); - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -unsigned long __FIXADDR_TOP = 0xfffff000; -EXPORT_SYMBOL(__FIXADDR_TOP); - -/* - * vmalloc=size forces the vmalloc area to be exactly 'size' - * bytes. This can be used to increase (or decrease) the - * vmalloc area - the default is 128m. - */ -static int __init parse_vmalloc(char *arg) -{ - if (!arg) - return -EINVAL; - - /* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/ - __VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET; - return 0; -} -early_param("vmalloc", parse_vmalloc); - -/* - * reservetop=size reserves a hole at the top of the kernel address space which - * a hypervisor can load into later. Needed for dynamically loaded hypervisors, - * so relocating the fixmap can be done before paging initialization. - */ -static int __init parse_reservetop(char *arg) -{ - unsigned long address; - - if (!arg) - return -EINVAL; - - address = memparse(arg, &arg); - reserve_top_address(address); - fixup_early_ioremap(); - return 0; -} -early_param("reservetop", parse_reservetop); diff --git a/ANDROID_3.4.5/arch/x86/mm/physaddr.c b/ANDROID_3.4.5/arch/x86/mm/physaddr.c deleted file mode 100644 index d2e27353..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/physaddr.c +++ /dev/null @@ -1,70 +0,0 @@ -#include <linux/mmdebug.h> -#include <linux/module.h> -#include <linux/mm.h> - -#include <asm/page.h> - -#include "physaddr.h" - -#ifdef CONFIG_X86_64 - -unsigned long __phys_addr(unsigned long x) -{ - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); - x += phys_base; - } else { - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(x)); - } - return x; -} -EXPORT_SYMBOL(__phys_addr); - -bool __virt_addr_valid(unsigned long x) -{ - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - if (x >= KERNEL_IMAGE_SIZE) - return false; - x += phys_base; - } else { - if (x < PAGE_OFFSET) - return false; - x -= PAGE_OFFSET; - if (!phys_addr_valid(x)) - return false; - } - - return pfn_valid(x >> PAGE_SHIFT); -} -EXPORT_SYMBOL(__virt_addr_valid); - -#else - -#ifdef CONFIG_DEBUG_VIRTUAL -unsigned long __phys_addr(unsigned long x) -{ - /* VMALLOC_* aren't constants */ - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); - return x - PAGE_OFFSET; -} -EXPORT_SYMBOL(__phys_addr); -#endif - -bool __virt_addr_valid(unsigned long x) -{ - if (x < PAGE_OFFSET) - return false; - if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) - return false; - if (x >= FIXADDR_START) - return false; - return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); -} -EXPORT_SYMBOL(__virt_addr_valid); - -#endif /* CONFIG_X86_64 */ diff --git a/ANDROID_3.4.5/arch/x86/mm/physaddr.h b/ANDROID_3.4.5/arch/x86/mm/physaddr.h deleted file mode 100644 index a3cd5a0c..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/physaddr.h +++ /dev/null @@ -1,10 +0,0 @@ -#include <asm/processor.h> - -static inline int phys_addr_valid(resource_size_t addr) -{ -#ifdef CONFIG_PHYS_ADDR_T_64BIT - return !(addr >> boot_cpu_data.x86_phys_bits); -#else - return 1; -#endif -} diff --git a/ANDROID_3.4.5/arch/x86/mm/setup_nx.c b/ANDROID_3.4.5/arch/x86/mm/setup_nx.c deleted file mode 100644 index 410531d3..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/setup_nx.c +++ /dev/null @@ -1,60 +0,0 @@ -#include <linux/spinlock.h> -#include <linux/errno.h> -#include <linux/init.h> - -#include <asm/pgtable.h> -#include <asm/proto.h> - -static int disable_nx __cpuinitdata; - -/* - * noexec = on|off - * - * Control non-executable mappings for processes. - * - * on Enable - * off Disable - */ -static int __init noexec_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - disable_nx = 0; - } else if (!strncmp(str, "off", 3)) { - disable_nx = 1; - } - x86_configure_nx(); - return 0; -} -early_param("noexec", noexec_setup); - -void __cpuinit x86_configure_nx(void) -{ - if (cpu_has_nx && !disable_nx) - __supported_pte_mask |= _PAGE_NX; - else - __supported_pte_mask &= ~_PAGE_NX; -} - -void __init x86_report_nx(void) -{ - if (!cpu_has_nx) { - printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " - "missing in CPU!\n"); - } else { -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - if (disable_nx) { - printk(KERN_INFO "NX (Execute Disable) protection: " - "disabled by kernel command line option\n"); - } else { - printk(KERN_INFO "NX (Execute Disable) protection: " - "active\n"); - } -#else - /* 32bit non-PAE kernel, NX cannot be used */ - printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " - "cannot be enabled: non-PAE kernel!\n"); -#endif - } -} diff --git a/ANDROID_3.4.5/arch/x86/mm/srat.c b/ANDROID_3.4.5/arch/x86/mm/srat.c deleted file mode 100644 index efb5b4b9..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/srat.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * ACPI 3.0 based NUMA setup - * Copyright 2004 Andi Kleen, SuSE Labs. - * - * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. - * - * Called from acpi_numa_init while reading the SRAT and SLIT tables. - * Assumes all memory regions belonging to a single proximity domain - * are in one chunk. Holes between them will be included in the node. - */ - -#include <linux/kernel.h> -#include <linux/acpi.h> -#include <linux/mmzone.h> -#include <linux/bitmap.h> -#include <linux/module.h> -#include <linux/topology.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/mm.h> -#include <asm/proto.h> -#include <asm/numa.h> -#include <asm/e820.h> -#include <asm/apic.h> -#include <asm/uv/uv.h> - -int acpi_numa __initdata; - -static __init int setup_node(int pxm) -{ - return acpi_map_pxm_to_node(pxm); -} - -static __init void bad_srat(void) -{ - printk(KERN_ERR "SRAT: SRAT not used.\n"); - acpi_numa = -1; -} - -static __init inline int srat_disabled(void) -{ - return acpi_numa < 0; -} - -/* Callback for SLIT parsing */ -void __init acpi_numa_slit_init(struct acpi_table_slit *slit) -{ - int i, j; - - for (i = 0; i < slit->locality_count; i++) - for (j = 0; j < slit->locality_count; j++) - numa_set_distance(pxm_to_node(i), pxm_to_node(j), - slit->entry[slit->locality_count * i + j]); -} - -/* Callback for Proximity Domain -> x2APIC mapping */ -void __init -acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) -{ - int pxm, node; - int apic_id; - - if (srat_disabled()) - return; - if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) { - bad_srat(); - return; - } - if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) - return; - pxm = pa->proximity_domain; - apic_id = pa->apic_id; - if (!apic->apic_id_valid(apic_id)) { - printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n", - pxm, apic_id); - return; - } - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); - bad_srat(); - return; - } - - if (apic_id >= MAX_LOCAL_APIC) { - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); - return; - } - set_apicid_to_node(apic_id, node); - node_set(node, numa_nodes_parsed); - acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", - pxm, apic_id, node); -} - -/* Callback for Proximity Domain -> LAPIC mapping */ -void __init -acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) -{ - int pxm, node; - int apic_id; - - if (srat_disabled()) - return; - if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) { - bad_srat(); - return; - } - if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) - return; - pxm = pa->proximity_domain_lo; - if (acpi_srat_revision >= 2) - pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8; - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); - bad_srat(); - return; - } - - if (get_uv_system_type() >= UV_X2APIC) - apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; - else - apic_id = pa->apic_id; - - if (apic_id >= MAX_LOCAL_APIC) { - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); - return; - } - - set_apicid_to_node(apic_id, node); - node_set(node, numa_nodes_parsed); - acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", - pxm, apic_id, node); -} - -#ifdef CONFIG_MEMORY_HOTPLUG -static inline int save_add_info(void) {return 1;} -#else -static inline int save_add_info(void) {return 0;} -#endif - -/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ -void __init -acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) -{ - u64 start, end; - int node, pxm; - - if (srat_disabled()) - return; - if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { - bad_srat(); - return; - } - if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) - return; - - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) - return; - start = ma->base_address; - end = start + ma->length; - pxm = ma->proximity_domain; - if (acpi_srat_revision <= 1) - pxm &= 0xff; - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains.\n"); - bad_srat(); - return; - } - - if (numa_add_memblk(node, start, end) < 0) { - bad_srat(); - return; - } - - printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, - start, end); -} - -void __init acpi_numa_arch_fixup(void) {} - -int __init x86_acpi_numa_init(void) -{ - int ret; - - ret = acpi_numa_init(); - if (ret < 0) - return ret; - return srat_disabled() ? -EINVAL : 0; -} diff --git a/ANDROID_3.4.5/arch/x86/mm/testmmiotrace.c b/ANDROID_3.4.5/arch/x86/mm/testmmiotrace.c deleted file mode 100644 index 38868adf..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/testmmiotrace.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/io.h> -#include <linux/mmiotrace.h> - -static unsigned long mmio_address; -module_param(mmio_address, ulong, 0); -MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " - "(or 8 MB if read_far is non-zero)."); - -static unsigned long read_far = 0x400100; -module_param(read_far, ulong, 0); -MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " - "(default: 0x400100)."); - -static unsigned v16(unsigned i) -{ - return i * 12 + 7; -} - -static unsigned v32(unsigned i) -{ - return i * 212371 + 13; -} - -static void do_write_test(void __iomem *p) -{ - unsigned int i; - pr_info("write test.\n"); - mmiotrace_printk("Write test.\n"); - - for (i = 0; i < 256; i++) - iowrite8(i, p + i); - - for (i = 1024; i < (5 * 1024); i += 2) - iowrite16(v16(i), p + i); - - for (i = (5 * 1024); i < (16 * 1024); i += 4) - iowrite32(v32(i), p + i); -} - -static void do_read_test(void __iomem *p) -{ - unsigned int i; - unsigned errs[3] = { 0 }; - pr_info("read test.\n"); - mmiotrace_printk("Read test.\n"); - - for (i = 0; i < 256; i++) - if (ioread8(p + i) != i) - ++errs[0]; - - for (i = 1024; i < (5 * 1024); i += 2) - if (ioread16(p + i) != v16(i)) - ++errs[1]; - - for (i = (5 * 1024); i < (16 * 1024); i += 4) - if (ioread32(p + i) != v32(i)) - ++errs[2]; - - mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", - errs[0], errs[1], errs[2]); -} - -static void do_read_far_test(void __iomem *p) -{ - pr_info("read far test.\n"); - mmiotrace_printk("Read far test.\n"); - - ioread32(p + read_far); -} - -static void do_test(unsigned long size) -{ - void __iomem *p = ioremap_nocache(mmio_address, size); - if (!p) { - pr_err("could not ioremap, aborting.\n"); - return; - } - mmiotrace_printk("ioremap returned %p.\n", p); - do_write_test(p); - do_read_test(p); - if (read_far && read_far < size - 4) - do_read_far_test(p); - iounmap(p); -} - -/* - * Tests how mmiotrace behaves in face of multiple ioremap / iounmaps in - * a short time. We had a bug in deferred freeing procedure which tried - * to free this region multiple times (ioremap can reuse the same address - * for many mappings). - */ -static void do_test_bulk_ioremapping(void) -{ - void __iomem *p; - int i; - - for (i = 0; i < 10; ++i) { - p = ioremap_nocache(mmio_address, PAGE_SIZE); - if (p) - iounmap(p); - } - - /* Force freeing. If it will crash we will know why. */ - synchronize_rcu(); -} - -static int __init init(void) -{ - unsigned long size = (read_far) ? (8 << 20) : (16 << 10); - - if (mmio_address == 0) { - pr_err("you have to use the module argument mmio_address.\n"); - pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); - return -ENXIO; - } - - pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " - "and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); - do_test(size); - do_test_bulk_ioremapping(); - pr_info("All done.\n"); - return 0; -} - -static void __exit cleanup(void) -{ - pr_debug("unloaded.\n"); -} - -module_init(init); -module_exit(cleanup); -MODULE_LICENSE("GPL"); diff --git a/ANDROID_3.4.5/arch/x86/mm/tlb.c b/ANDROID_3.4.5/arch/x86/mm/tlb.c deleted file mode 100644 index d6c0418c..00000000 --- a/ANDROID_3.4.5/arch/x86/mm/tlb.c +++ /dev/null @@ -1,332 +0,0 @@ -#include <linux/init.h> - -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/cpu.h> - -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/cache.h> -#include <asm/apic.h> -#include <asm/uv/uv.h> - -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) - = { &init_mm, 0, }; - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul <manfred@colorfullife.com> - * - * More scalable flush, from Andi Kleen - * - * To avoid global state use 8 different call vectors. - * Each CPU uses a specific vector to trigger flushes on other - * CPUs. Depending on the received vector the target CPUs look into - * the right array slot for the flush data. - * - * With more than 8 CPUs they are hashed to the 8 available - * vectors. The limited global vector space forces us to this right now. - * In future when interrupts are split into per CPU domains this could be - * fixed, at the cost of triggering multiple IPIs in some cases. - */ - -union smp_flush_state { - struct { - struct mm_struct *flush_mm; - unsigned long flush_va; - raw_spinlock_t tlbstate_lock; - DECLARE_BITMAP(flush_cpumask, NR_CPUS); - }; - char pad[INTERNODE_CACHE_BYTES]; -} ____cacheline_internodealigned_in_smp; - -/* State is put into the per CPU data section, but padded - to a full cache line because other CPUs can access it and we don't - want false sharing in the per cpu data segment. */ -static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; - -static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - */ -void leave_mm(int cpu) -{ - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - BUG(); - cpumask_clear_cpu(cpu, - mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu mmu_state to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu mmu_state to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu mmu_state is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. - */ - -/* - * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop - * but still used for documentation purpose but the usage is slightly - * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt - * entry calls in with the first parameter in %eax. Maybe define - * intrlinkage? - */ -#ifdef CONFIG_X86_64 -asmlinkage -#endif -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned int cpu; - unsigned int sender; - union smp_flush_state *f; - - cpu = smp_processor_id(); - /* - * orig_rax contains the negated interrupt vector. - * Use that to determine where the sender put the data. - */ - sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &flush_state[sender]; - - if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (f->flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(f->flush_va); - } else - leave_mm(cpu); - } -out: - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); - smp_mb__after_clear_bit(); - inc_irq_stat(irq_tlb_count); -} - -static void flush_tlb_others_ipi(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - unsigned int sender; - union smp_flush_state *f; - - /* Caller has disabled preemption */ - sender = this_cpu_read(tlb_vector_offset); - f = &flush_state[sender]; - - if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) - raw_spin_lock(&f->tlbstate_lock); - - f->flush_mm = mm; - f->flush_va = va; - if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { - /* - * We have to send the IPI only to - * CPUs affected. - */ - apic->send_IPI_mask(to_cpumask(f->flush_cpumask), - INVALIDATE_TLB_VECTOR_START + sender); - - while (!cpumask_empty(to_cpumask(f->flush_cpumask))) - cpu_relax(); - } - - f->flush_mm = NULL; - f->flush_va = 0; - if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) - raw_spin_unlock(&f->tlbstate_lock); -} - -void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - if (is_uv_system()) { - unsigned int cpu; - - cpu = smp_processor_id(); - cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); - if (cpumask) - flush_tlb_others_ipi(cpumask, mm, va); - return; - } - flush_tlb_others_ipi(cpumask, mm, va); -} - -static void __cpuinit calculate_tlb_offset(void) -{ - int cpu, node, nr_node_vecs, idx = 0; - /* - * we are changing tlb_vector_offset for each CPU in runtime, but this - * will not cause inconsistency, as the write is atomic under X86. we - * might see more lock contentions in a short time, but after all CPU's - * tlb_vector_offset are changed, everything should go normal - * - * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might - * waste some vectors. - **/ - if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) - nr_node_vecs = 1; - else - nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; - - for_each_online_node(node) { - int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * - nr_node_vecs; - int cpu_offset = 0; - for_each_cpu(cpu, cpumask_of_node(node)) { - per_cpu(tlb_vector_offset, cpu) = node_offset + - cpu_offset; - cpu_offset++; - cpu_offset = cpu_offset % nr_node_vecs; - } - idx++; - } -} - -static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) -{ - switch (action & 0xf) { - case CPU_ONLINE: - case CPU_DEAD: - calculate_tlb_offset(); - } - return NOTIFY_OK; -} - -static int __cpuinit init_smp_flush(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(flush_state); i++) - raw_spin_lock_init(&flush_state[i].tlbstate_lock); - - calculate_tlb_offset(); - hotcpu_notifier(tlb_cpuhp_notify, 0); - return 0; -} -core_initcall(init_smp_flush); - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - local_flush_tlb(); - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, va); - - preempt_enable(); -} - -static void do_flush_tlb_all(void *info) -{ - __flush_tlb_all(); - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(smp_processor_id()); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1); -} |