diff options
Diffstat (limited to 'ANDROID_3.4.5/arch/x86/kernel')
219 files changed, 0 insertions, 84799 deletions
diff --git a/ANDROID_3.4.5/arch/x86/kernel/Makefile b/ANDROID_3.4.5/arch/x86/kernel/Makefile deleted file mode 100644 index 532d2e09..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/Makefile +++ /dev/null @@ -1,115 +0,0 @@ -# -# Makefile for the linux kernel. -# - -extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds - -CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) - -ifdef CONFIG_FUNCTION_TRACER -# Do not profile debug and lowlevel utilities -CFLAGS_REMOVE_tsc.o = -pg -CFLAGS_REMOVE_rtc.o = -pg -CFLAGS_REMOVE_paravirt-spinlocks.o = -pg -CFLAGS_REMOVE_pvclock.o = -pg -CFLAGS_REMOVE_kvmclock.o = -pg -CFLAGS_REMOVE_ftrace.o = -pg -CFLAGS_REMOVE_early_printk.o = -pg -endif - -obj-y := process_$(BITS).o signal.o entry_$(BITS).o -obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o -obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o -obj-$(CONFIG_IRQ_WORK) += irq_work.o -obj-y += probe_roms.o -obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o -obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o -obj-y += syscall_$(BITS).o -obj-$(CONFIG_X86_64) += vsyscall_64.o -obj-$(CONFIG_X86_64) += vsyscall_emu_64.o -obj-y += bootflag.o e820.o -obj-y += pci-dma.o quirks.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o -obj-y += tsc.o io_delay.o rtc.o -obj-y += pci-iommu_table.o -obj-y += resource.o - -obj-y += trampoline.o trampoline_$(BITS).o -obj-y += process.o -obj-y += i387.o xsave.o -obj-y += ptrace.o -obj-$(CONFIG_X86_32) += tls.o -obj-$(CONFIG_IA32_EMULATION) += tls.o -obj-y += step.o -obj-$(CONFIG_INTEL_TXT) += tboot.o -obj-$(CONFIG_ISA_DMA_API) += i8237.o -obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-y += cpu/ -obj-y += acpi/ -obj-y += reboot.o -obj-$(CONFIG_X86_32) += reboot_32.o -obj-$(CONFIG_MCA) += mca_32.o -obj-$(CONFIG_X86_MSR) += msr.o -obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_PCI) += early-quirks.o -apm-y := apm_32.o -obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SMP) += smpboot.o -obj-$(CONFIG_SMP) += tsc_sync.o -obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-y += apic/ -obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o -obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o -obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_OPTPROBES) += kprobes-opt.o -obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o -obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_VM86) += vm86_32.o -obj-$(CONFIG_EARLY_PRINTK) += early_printk.o - -obj-$(CONFIG_HPET_TIMER) += hpet.o -obj-$(CONFIG_APB_TIMER) += apb_timer.o - -obj-$(CONFIG_AMD_NB) += amd_nb.o -obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o -obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o -obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o - -obj-$(CONFIG_KVM_GUEST) += kvm.o -obj-$(CONFIG_KVM_CLOCK) += kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o -obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o -obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o - -obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o - -microcode-y := microcode_core.o -microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o -microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o -obj-$(CONFIG_MICROCODE) += microcode.o - -obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o - -obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o -obj-$(CONFIG_OF) += devicetree.o - -### -# 64 bit specific files -ifeq ($(CONFIG_X86_64),y) - obj-$(CONFIG_AUDIT) += audit_64.o - - obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o - obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o - - obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o - obj-y += vsmp_64.o -endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/Makefile b/ANDROID_3.4.5/arch/x86/kernel/acpi/Makefile deleted file mode 100644 index 6f35260b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -subdir- := realmode - -obj-$(CONFIG_ACPI) += boot.o -obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o - -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += cstate.o -endif - -$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin - -$(obj)/realmode/wakeup.bin: FORCE - $(Q)$(MAKE) $(build)=$(obj)/realmode - diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/boot.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/boot.c deleted file mode 100644 index bbdffc2d..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/boot.c +++ /dev/null @@ -1,1702 +0,0 @@ -/* - * boot.c - Architecture-Specific Low-Level ACPI Boot Support - * - * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> - * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include <linux/init.h> -#include <linux/acpi.h> -#include <linux/acpi_pmtmr.h> -#include <linux/efi.h> -#include <linux/cpumask.h> -#include <linux/module.h> -#include <linux/dmi.h> -#include <linux/irq.h> -#include <linux/slab.h> -#include <linux/bootmem.h> -#include <linux/ioport.h> -#include <linux/pci.h> - -#include <asm/pci_x86.h> -#include <asm/pgtable.h> -#include <asm/io_apic.h> -#include <asm/apic.h> -#include <asm/io.h> -#include <asm/mpspec.h> -#include <asm/smp.h> - -static int __initdata acpi_force = 0; -u32 acpi_rsdt_forced; -int acpi_disabled; -EXPORT_SYMBOL(acpi_disabled); - -#ifdef CONFIG_X86_64 -# include <asm/proto.h> -# include <asm/numa_64.h> -#endif /* X86 */ - -#define BAD_MADT_ENTRY(entry, end) ( \ - (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ - ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) - -#define PREFIX "ACPI: " - -int acpi_noirq; /* skip ACPI IRQ initialization */ -int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ -EXPORT_SYMBOL(acpi_pci_disabled); - -int acpi_lapic; -int acpi_ioapic; -int acpi_strict; - -u8 acpi_sci_flags __initdata; -int acpi_sci_override_gsi __initdata; -int acpi_skip_timer_override __initdata; -int acpi_use_timer_override __initdata; -int acpi_fix_pin2_polarity __initdata; - -#ifdef CONFIG_X86_LOCAL_APIC -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; -#endif - -#ifndef __HAVE_ARCH_CMPXCHG -#warning ACPI uses CMPXCHG, i486 and later hardware -#endif - -/* -------------------------------------------------------------------------- - Boot-time Configuration - -------------------------------------------------------------------------- */ - -/* - * The default interrupt routing model is PIC (8259). This gets - * overridden if IOAPICs are enumerated (below). - */ -enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; - - -/* - * ISA irqs by default are the first 16 gsis but can be - * any gsi as specified by an interrupt source override. - */ -static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 -}; - -static unsigned int gsi_to_irq(unsigned int gsi) -{ - unsigned int irq = gsi + NR_IRQS_LEGACY; - unsigned int i; - - for (i = 0; i < NR_IRQS_LEGACY; i++) { - if (isa_irq_to_gsi[i] == gsi) { - return i; - } - } - - /* Provide an identity mapping of gsi == irq - * except on truly weird platforms that have - * non isa irqs in the first 16 gsis. - */ - if (gsi >= NR_IRQS_LEGACY) - irq = gsi; - else - irq = gsi_top + gsi; - - return irq; -} - -static u32 irq_to_gsi(int irq) -{ - unsigned int gsi; - - if (irq < NR_IRQS_LEGACY) - gsi = isa_irq_to_gsi[irq]; - else if (irq < gsi_top) - gsi = irq; - else if (irq < (gsi_top + NR_IRQS_LEGACY)) - gsi = irq - gsi_top; - else - gsi = 0xffffffff; - - return gsi; -} - -/* - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, - * to map the target physical address. The problem is that set_fixmap() - * provides a single page, and it is possible that the page is not - * sufficient. - * By using this area, we can map up to MAX_IO_APICS pages temporarily, - * i.e. until the next __va_range() call. - * - * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* - * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and - * count idx down while incrementing the phys address. - */ -char *__init __acpi_map_table(unsigned long phys, unsigned long size) -{ - - if (!phys || !size) - return NULL; - - return early_ioremap(phys, size); -} -void __init __acpi_unmap_table(char *map, unsigned long size) -{ - if (!map || !size) - return; - - early_iounmap(map, size); -} - -#ifdef CONFIG_X86_LOCAL_APIC -static int __init acpi_parse_madt(struct acpi_table_header *table) -{ - struct acpi_table_madt *madt = NULL; - - if (!cpu_has_apic) - return -EINVAL; - - madt = (struct acpi_table_madt *)table; - if (!madt) { - printk(KERN_WARNING PREFIX "Unable to map MADT\n"); - return -ENODEV; - } - - if (madt->address) { - acpi_lapic_addr = (u64) madt->address; - - printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", - madt->address); - } - - default_acpi_madt_oem_check(madt->header.oem_id, - madt->header.oem_table_id); - - return 0; -} - -static void __cpuinit acpi_register_lapic(int id, u8 enabled) -{ - unsigned int ver = 0; - - if (id >= (MAX_LOCAL_APIC-1)) { - printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); - return; - } - - if (!enabled) { - ++disabled_cpus; - return; - } - - if (boot_cpu_physical_apicid != -1U) - ver = apic_version[boot_cpu_physical_apicid]; - - generic_processor_info(id, ver); -} - -static int __init -acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) -{ - struct acpi_madt_local_x2apic *processor = NULL; - int apic_id; - u8 enabled; - - processor = (struct acpi_madt_local_x2apic *)header; - - if (BAD_MADT_ENTRY(processor, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - apic_id = processor->local_apic_id; - enabled = processor->lapic_flags & ACPI_MADT_ENABLED; -#ifdef CONFIG_X86_X2APIC - /* - * We need to register disabled CPU as well to permit - * counting disabled CPUs. This allows us to size - * cpus_possible_map more accurately, to permit - * to not preallocating memory for all NR_CPUS - * when we use CPU hotplug. - */ - if (!apic->apic_id_valid(apic_id) && enabled) - printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); - else - acpi_register_lapic(apic_id, enabled); -#else - printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); -#endif - - return 0; -} - -static int __init -acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_local_apic *processor = NULL; - - processor = (struct acpi_madt_local_apic *)header; - - if (BAD_MADT_ENTRY(processor, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* - * We need to register disabled CPU as well to permit - * counting disabled CPUs. This allows us to size - * cpus_possible_map more accurately, to permit - * to not preallocating memory for all NR_CPUS - * when we use CPU hotplug. - */ - acpi_register_lapic(processor->id, /* APIC ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); - - return 0; -} - -static int __init -acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) -{ - struct acpi_madt_local_sapic *processor = NULL; - - processor = (struct acpi_madt_local_sapic *)header; - - if (BAD_MADT_ENTRY(processor, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); - - return 0; -} - -static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, - const unsigned long end) -{ - struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; - - lapic_addr_ovr = (struct acpi_madt_local_apic_override *)header; - - if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) - return -EINVAL; - - acpi_lapic_addr = lapic_addr_ovr->address; - - return 0; -} - -static int __init -acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, - const unsigned long end) -{ - struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL; - - x2apic_nmi = (struct acpi_madt_local_x2apic_nmi *)header; - - if (BAD_MADT_ENTRY(x2apic_nmi, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (x2apic_nmi->lint != 1) - printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); - - return 0; -} - -static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; - - lapic_nmi = (struct acpi_madt_local_apic_nmi *)header; - - if (BAD_MADT_ENTRY(lapic_nmi, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (lapic_nmi->lint != 1) - printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); - - return 0; -} - -#endif /*CONFIG_X86_LOCAL_APIC */ - -#ifdef CONFIG_X86_IO_APIC - -static int __init -acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_io_apic *ioapic = NULL; - - ioapic = (struct acpi_madt_io_apic *)header; - - if (BAD_MADT_ENTRY(ioapic, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_register_ioapic(ioapic->id, - ioapic->address, ioapic->global_irq_base); - - return 0; -} - -/* - * Parse Interrupt Source Override for the ACPI SCI - */ -static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, u32 gsi) -{ - if (trigger == 0) /* compatible SCI trigger is level */ - trigger = 3; - - if (polarity == 0) /* compatible SCI polarity is low */ - polarity = 3; - - /* Command-line over-ride via acpi_sci= */ - if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) - trigger = (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2; - - if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) - polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - - /* - * mp_config_acpi_legacy_irqs() already setup IRQs < 16 - * If GSI is < 16, this will update its flags, - * else it will create a new mp_irqs[] entry. - */ - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); - - /* - * stash over-ride to indicate we've been here - * and for later update of acpi_gbl_FADT - */ - acpi_sci_override_gsi = gsi; - return; -} - -static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, - const unsigned long end) -{ - struct acpi_madt_interrupt_override *intsrc = NULL; - - intsrc = (struct acpi_madt_interrupt_override *)header; - - if (BAD_MADT_ENTRY(intsrc, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { - acpi_sci_ioapic_setup(intsrc->source_irq, - intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, - (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, - intsrc->global_irq); - return 0; - } - - if (intsrc->source_irq == 0) { - if (acpi_skip_timer_override) { - printk(PREFIX "BIOS IRQ0 override ignored.\n"); - return 0; - } - - if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity - && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { - intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; - printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); - } - } - - mp_override_legacy_irq(intsrc->source_irq, - intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, - (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, - intsrc->global_irq); - - return 0; -} - -static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_nmi_source *nmi_src = NULL; - - nmi_src = (struct acpi_madt_nmi_source *)header; - - if (BAD_MADT_ENTRY(nmi_src, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* TBD: Support nimsrc entries? */ - - return 0; -} - -#endif /* CONFIG_X86_IO_APIC */ - -/* - * acpi_pic_sci_set_trigger() - * - * use ELCR to set PIC-mode trigger type for SCI - * - * If a PIC-mode SCI is not recognized or gives spurious IRQ7's - * it may require Edge Trigger -- use "acpi_sci=edge" - * - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers - * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. - * ECLR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0) - * ECLR2 is IRQs 8-15 (IRQ 8, 13 must be 0) - */ - -void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) -{ - unsigned int mask = 1 << irq; - unsigned int old, new; - - /* Real old ELCR mask */ - old = inb(0x4d0) | (inb(0x4d1) << 8); - - /* - * If we use ACPI to set PCI IRQs, then we should clear ELCR - * since we will set it correctly as we enable the PCI irq - * routing. - */ - new = acpi_noirq ? old : 0; - - /* - * Update SCI information in the ELCR, it isn't in the PCI - * routing tables.. - */ - switch (trigger) { - case 1: /* Edge - clear */ - new &= ~mask; - break; - case 3: /* Level - set */ - new |= mask; - break; - } - - if (old == new) - return; - - printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old); - outb(new, 0x4d0); - outb(new >> 8, 0x4d1); -} - -int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) -{ - *irq = gsi_to_irq(gsi); - -#ifdef CONFIG_X86_IO_APIC - if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) - setup_IO_APIC_irq_extra(gsi); -#endif - - return 0; -} -EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); - -int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) -{ - if (isa_irq >= 16) - return -1; - *gsi = irq_to_gsi(isa_irq); - return 0; -} - -static int acpi_register_gsi_pic(struct device *dev, u32 gsi, - int trigger, int polarity) -{ -#ifdef CONFIG_PCI - /* - * Make sure all (legacy) PCI IRQs are set as level-triggered. - */ - if (trigger == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); -#endif - - return gsi; -} - -static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, - int trigger, int polarity) -{ -#ifdef CONFIG_X86_IO_APIC - gsi = mp_register_gsi(dev, gsi, trigger, polarity); -#endif - - return gsi; -} - -int (*__acpi_register_gsi)(struct device *dev, u32 gsi, - int trigger, int polarity) = acpi_register_gsi_pic; - -/* - * success: return IRQ number (>=0) - * failure: return < 0 - */ -int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) -{ - unsigned int irq; - unsigned int plat_gsi = gsi; - - plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity); - irq = gsi_to_irq(plat_gsi); - - return irq; -} - -void __init acpi_set_irq_model_pic(void) -{ - acpi_irq_model = ACPI_IRQ_MODEL_PIC; - __acpi_register_gsi = acpi_register_gsi_pic; - acpi_ioapic = 0; -} - -void __init acpi_set_irq_model_ioapic(void) -{ - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - __acpi_register_gsi = acpi_register_gsi_ioapic; - acpi_ioapic = 1; -} - -/* - * ACPI based hotplug support for CPU - */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU -#include <acpi/processor.h> - -static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ -#ifdef CONFIG_ACPI_NUMA - int nid; - - nid = acpi_get_node(handle); - if (nid == -1 || !node_online(nid)) - return; - set_apicid_to_node(physid, nid); - numa_set_node(cpu, nid); -#endif -} - -static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) -{ - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - struct acpi_madt_local_apic *lapic; - cpumask_var_t tmp_map, new_map; - u8 physid; - int cpu; - int retval = -ENOMEM; - - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) - return -EINVAL; - - if (!buffer.length || !buffer.pointer) - return -EINVAL; - - obj = buffer.pointer; - if (obj->type != ACPI_TYPE_BUFFER || - obj->buffer.length < sizeof(*lapic)) { - kfree(buffer.pointer); - return -EINVAL; - } - - lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer; - - if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC || - !(lapic->lapic_flags & ACPI_MADT_ENABLED)) { - kfree(buffer.pointer); - return -EINVAL; - } - - physid = lapic->id; - - kfree(buffer.pointer); - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; - lapic = NULL; - - if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) - goto out; - - if (!alloc_cpumask_var(&new_map, GFP_KERNEL)) - goto free_tmp_map; - - cpumask_copy(tmp_map, cpu_present_mask); - acpi_register_lapic(physid, ACPI_MADT_ENABLED); - - /* - * If mp_register_lapic successfully generates a new logical cpu - * number, then the following will get us exactly what was mapped - */ - cpumask_andnot(new_map, cpu_present_mask, tmp_map); - if (cpumask_empty(new_map)) { - printk ("Unable to map lapic to logical cpu number\n"); - retval = -EINVAL; - goto free_new_map; - } - - acpi_processor_set_pdc(handle); - - cpu = cpumask_first(new_map); - acpi_map_cpu2node(handle, cpu, physid); - - *pcpu = cpu; - retval = 0; - -free_new_map: - free_cpumask_var(new_map); -free_tmp_map: - free_cpumask_var(tmp_map); -out: - return retval; -} - -/* wrapper to silence section mismatch warning */ -int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu) -{ - return _acpi_map_lsapic(handle, pcpu); -} -EXPORT_SYMBOL(acpi_map_lsapic); - -int acpi_unmap_lsapic(int cpu) -{ - per_cpu(x86_cpu_to_apicid, cpu) = -1; - set_cpu_present(cpu, false); - num_processors--; - - return (0); -} - -EXPORT_SYMBOL(acpi_unmap_lsapic); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - -int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) -{ - /* TBD */ - return -EINVAL; -} - -EXPORT_SYMBOL(acpi_register_ioapic); - -int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) -{ - /* TBD */ - return -EINVAL; -} - -EXPORT_SYMBOL(acpi_unregister_ioapic); - -static int __init acpi_parse_sbf(struct acpi_table_header *table) -{ - struct acpi_table_boot *sb; - - sb = (struct acpi_table_boot *)table; - if (!sb) { - printk(KERN_WARNING PREFIX "Unable to map SBF\n"); - return -ENODEV; - } - - sbf_port = sb->cmos_index; /* Save CMOS port */ - - return 0; -} - -#ifdef CONFIG_HPET_TIMER -#include <asm/hpet.h> - -static struct __initdata resource *hpet_res; - -static int __init acpi_parse_hpet(struct acpi_table_header *table) -{ - struct acpi_table_hpet *hpet_tbl; - - hpet_tbl = (struct acpi_table_hpet *)table; - if (!hpet_tbl) { - printk(KERN_WARNING PREFIX "Unable to map HPET\n"); - return -ENODEV; - } - - if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { - printk(KERN_WARNING PREFIX "HPET timers must be located in " - "memory.\n"); - return -1; - } - - hpet_address = hpet_tbl->address.address; - hpet_blockid = hpet_tbl->sequence; - - /* - * Some broken BIOSes advertise HPET at 0x0. We really do not - * want to allocate a resource there. - */ - if (!hpet_address) { - printk(KERN_WARNING PREFIX - "HPET id: %#x base: %#lx is invalid\n", - hpet_tbl->id, hpet_address); - return 0; - } -#ifdef CONFIG_X86_64 - /* - * Some even more broken BIOSes advertise HPET at - * 0xfed0000000000000 instead of 0xfed00000. Fix it up and add - * some noise: - */ - if (hpet_address == 0xfed0000000000000UL) { - if (!hpet_force_user) { - printk(KERN_WARNING PREFIX "HPET id: %#x " - "base: 0xfed0000000000000 is bogus\n " - "try hpet=force on the kernel command line to " - "fix it up to 0xfed00000.\n", hpet_tbl->id); - hpet_address = 0; - return 0; - } - printk(KERN_WARNING PREFIX - "HPET id: %#x base: 0xfed0000000000000 fixed up " - "to 0xfed00000.\n", hpet_tbl->id); - hpet_address >>= 32; - } -#endif - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); - - /* - * Allocate and initialize the HPET firmware resource for adding into - * the resource tree during the lateinit timeframe. - */ -#define HPET_RESOURCE_NAME_SIZE 9 - hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); - - hpet_res->name = (void *)&hpet_res[1]; - hpet_res->flags = IORESOURCE_MEM; - snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", - hpet_tbl->sequence); - - hpet_res->start = hpet_address; - hpet_res->end = hpet_address + (1 * 1024) - 1; - - return 0; -} - -/* - * hpet_insert_resource inserts the HPET resources used into the resource - * tree. - */ -static __init int hpet_insert_resource(void) -{ - if (!hpet_res) - return 1; - - return insert_resource(&iomem_resource, hpet_res); -} - -late_initcall(hpet_insert_resource); - -#else -#define acpi_parse_hpet NULL -#endif - -static int __init acpi_parse_fadt(struct acpi_table_header *table) -{ - -#ifdef CONFIG_X86_PM_TIMER - /* detect the location of the ACPI PM Timer */ - if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) { - /* FADT rev. 2 */ - if (acpi_gbl_FADT.xpm_timer_block.space_id != - ACPI_ADR_SPACE_SYSTEM_IO) - return 0; - - pmtmr_ioport = acpi_gbl_FADT.xpm_timer_block.address; - /* - * "X" fields are optional extensions to the original V1.0 - * fields, so we must selectively expand V1.0 fields if the - * corresponding X field is zero. - */ - if (!pmtmr_ioport) - pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; - } else { - /* FADT rev. 1 */ - pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; - } - if (pmtmr_ioport) - printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", - pmtmr_ioport); -#endif - return 0; -} - -#ifdef CONFIG_X86_LOCAL_APIC -/* - * Parse LAPIC entries in MADT - * returns 0 on success, < 0 on error - */ - -static int __init early_acpi_parse_madt_lapic_addr_ovr(void) -{ - int count; - - if (!cpu_has_apic) - return -ENODEV; - - /* - * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). - */ - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); - if (count < 0) { - printk(KERN_ERR PREFIX - "Error parsing LAPIC address override entry\n"); - return count; - } - - register_lapic_address(acpi_lapic_addr); - - return count; -} - -static int __init acpi_parse_madt_lapic_entries(void) -{ - int count; - int x2count = 0; - - if (!cpu_has_apic) - return -ENODEV; - - /* - * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). - */ - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); - if (count < 0) { - printk(KERN_ERR PREFIX - "Error parsing LAPIC address override entry\n"); - return count; - } - - register_lapic_address(acpi_lapic_addr); - - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, - acpi_parse_sapic, MAX_LOCAL_APIC); - - if (!count) { - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, - acpi_parse_x2apic, MAX_LOCAL_APIC); - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, - acpi_parse_lapic, MAX_LOCAL_APIC); - } - if (!count && !x2count) { - printk(KERN_ERR PREFIX "No LAPIC entries present\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return -ENODEV; - } else if (count < 0 || x2count < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - - x2count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, - acpi_parse_x2apic_nmi, 0); - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); - if (count < 0 || x2count < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - return 0; -} -#endif /* CONFIG_X86_LOCAL_APIC */ - -#ifdef CONFIG_X86_IO_APIC -#define MP_ISA_BUS 0 - -#ifdef CONFIG_X86_ES7000 -extern int es7000_plat; -#endif - -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - - isa_irq_to_gsi[bus_irq] = gsi; -} - -void __init mp_config_acpi_legacy_irqs(void) -{ - int i; - struct mpc_intsrc mp_irq; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; -#endif - set_bit(MP_ISA_BUS, mp_bus_not_pci); - pr_debug("Bus #%d is ISA\n", MP_ISA_BUS); - -#ifdef CONFIG_X86_ES7000 - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; -#endif - - /* - * Use the default configuration for the IRQs 0-15. Unless - * overridden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int ioapic, pin; - unsigned int dstapic; - int idx; - u32 gsi; - - /* Locate the gsi that irq i maps to. */ - if (acpi_isa_irq_to_gsi(i, &gsi)) - continue; - - /* - * Locate the IOAPIC that manages the ISA IRQ. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - continue; - pin = mp_find_ioapic_pin(ioapic, gsi); - dstapic = mpc_ioapic_id(ioapic); - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if (irq->dstapic == dstapic && irq->dstirq == pin) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - mp_irq.type = MP_INTSRC; - mp_irq.irqflag = 0; /* Conforming */ - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.dstapic = dstapic; - mp_irq.irqtype = mp_INT; - mp_irq.srcbusirq = i; /* Identity mapped */ - mp_irq.dstirq = pin; - - mp_save_irq(&mp_irq); - } -} - -static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, - int polarity) -{ -#ifdef CONFIG_X86_MPPARSE - struct mpc_intsrc mp_irq; - struct pci_dev *pdev; - unsigned char number; - unsigned int devfn; - int ioapic; - u8 pin; - - if (!acpi_ioapic) - return 0; - if (!dev) - return 0; - if (dev->bus != &pci_bus_type) - return 0; - - pdev = to_pci_dev(dev); - number = pdev->bus->number; - devfn = pdev->devfn; - pin = pdev->pin; - /* print the entry should happen on mptable identically */ - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | - (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.srcbus = number; - mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); - ioapic = mp_find_ioapic(gsi); - mp_irq.dstapic = mpc_ioapic_id(ioapic); - mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); - - mp_save_irq(&mp_irq); -#endif - return 0; -} - -int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) -{ - int ioapic; - int ioapic_pin; - struct io_apic_irq_attr irq_attr; - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); - - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mpc_ioapic_id(ioapic), - ioapic_pin); - return gsi; - } - - if (enable_update_mptable) - mp_config_acpi_gsi(dev, gsi, trigger, polarity); - - set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, - trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); - - return gsi; -} - -/* - * Parse IOAPIC related entries in MADT - * returns 0 on success, < 0 on error - */ -static int __init acpi_parse_madt_ioapic_entries(void) -{ - int count; - - /* - * ACPI interpreter is required to complete interrupt setup, - * so if it is off, don't enumerate the io-apics with ACPI. - * If MPS is present, it will handle them, - * otherwise the system will stay in PIC mode - */ - if (acpi_disabled || acpi_noirq) - return -ENODEV; - - if (!cpu_has_apic) - return -ENODEV; - - /* - * if "noapic" boot option, don't look for IO-APICs - */ - if (skip_ioapic_setup) { - printk(KERN_INFO PREFIX "Skipping IOAPIC probe " - "due to 'noapic' option.\n"); - return -ENODEV; - } - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, - MAX_IO_APICS); - if (!count) { - printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); - return -ENODEV; - } else if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); - return count; - } - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, - nr_irqs); - if (count < 0) { - printk(KERN_ERR PREFIX - "Error parsing interrupt source overrides entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - - /* - * If BIOS did not supply an INT_SRC_OVR for the SCI - * pretend we got one so we can set the SCI flags. - */ - if (!acpi_sci_override_gsi) - acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, - acpi_gbl_FADT.sci_interrupt); - - /* Fill in identity legacy mappings where no override */ - mp_config_acpi_legacy_irqs(); - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, - nr_irqs); - if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - - return 0; -} -#else -static inline int acpi_parse_madt_ioapic_entries(void) -{ - return -1; -} -#endif /* !CONFIG_X86_IO_APIC */ - -static void __init early_acpi_process_madt(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int error; - - if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { - - /* - * Parse MADT LAPIC entries - */ - error = early_acpi_parse_madt_lapic_addr_ovr(); - if (!error) { - acpi_lapic = 1; - smp_found_config = 1; - } - if (error == -EINVAL) { - /* - * Dell Precision Workstation 410, 610 come here. - */ - printk(KERN_ERR PREFIX - "Invalid BIOS MADT, disabling ACPI\n"); - disable_acpi(); - } - } -#endif -} - -static void __init acpi_process_madt(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int error; - - if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { - - /* - * Parse MADT LAPIC entries - */ - error = acpi_parse_madt_lapic_entries(); - if (!error) { - acpi_lapic = 1; - - /* - * Parse MADT IO-APIC entries - */ - error = acpi_parse_madt_ioapic_entries(); - if (!error) { - acpi_set_irq_model_ioapic(); - - smp_found_config = 1; - } - } - if (error == -EINVAL) { - /* - * Dell Precision Workstation 410, 610 come here. - */ - printk(KERN_ERR PREFIX - "Invalid BIOS MADT, disabling ACPI\n"); - disable_acpi(); - } - } else { - /* - * ACPI found no MADT, and so ACPI wants UP PIC mode. - * In the event an MPS table was found, forget it. - * Boot with "acpi=off" to use MPS on such a system. - */ - if (smp_found_config) { - printk(KERN_WARNING PREFIX - "No APIC-table, disabling MPS\n"); - smp_found_config = 0; - } - } - - /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " - "information\n"); - else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) " - "configuration information\n"); -#endif - return; -} - -static int __init disable_acpi_irq(const struct dmi_system_id *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", - d->ident); - acpi_noirq_set(); - } - return 0; -} - -static int __init disable_acpi_pci(const struct dmi_system_id *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", - d->ident); - acpi_disable_pci(); - } - return 0; -} - -static int __init dmi_disable_acpi(const struct dmi_system_id *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n", d->ident); - disable_acpi(); - } else { - printk(KERN_NOTICE - "Warning: DMI blacklist says broken, but acpi forced\n"); - } - return 0; -} - -/* - * Force ignoring BIOS IRQ0 override - */ -static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) -{ - if (!acpi_skip_timer_override) { - pr_notice("%s detected: Ignoring BIOS IRQ0 override\n", - d->ident); - acpi_skip_timer_override = 1; - } - return 0; -} - -/* - * If your system is blacklisted here, but you find that acpi=force - * works for you, please contact linux-acpi@vger.kernel.org - */ -static struct dmi_system_id __initdata acpi_dmi_table[] = { - /* - * Boxes that need ACPI disabled - */ - { - .callback = dmi_disable_acpi, - .ident = "IBM Thinkpad", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), - }, - }, - - /* - * Boxes that need ACPI PCI IRQ routing disabled - */ - { - .callback = disable_acpi_irq, - .ident = "ASUS A7V", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), - DMI_MATCH(DMI_BOARD_NAME, "<A7V>"), - /* newer BIOS, Revision 1011, does work */ - DMI_MATCH(DMI_BIOS_VERSION, - "ASUS A7V ACPI BIOS Revision 1007"), - }, - }, - { - /* - * Latest BIOS for IBM 600E (1.16) has bad pcinum - * for LPC bridge, which is needed for the PCI - * interrupt links to work. DSDT fix is in bug 5966. - * 2645, 2646 model numbers are shared with 600/600E/600X - */ - .callback = disable_acpi_irq, - .ident = "IBM Thinkpad 600 Series 2645", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2645"), - }, - }, - { - .callback = disable_acpi_irq, - .ident = "IBM Thinkpad 600 Series 2646", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2646"), - }, - }, - /* - * Boxes that need ACPI PCI IRQ routing and PCI scan disabled - */ - { /* _BBN 0 bug */ - .callback = disable_acpi_pci, - .ident = "ASUS PR-DLS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), - DMI_MATCH(DMI_BIOS_VERSION, - "ASUS PR-DLS ACPI BIOS Revision 1010"), - DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") - }, - }, - { - .callback = disable_acpi_pci, - .ident = "Acer TravelMate 36x Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - }, - }, - {} -}; - -/* second table for DMI checks that should run after early-quirks */ -static struct dmi_system_id __initdata acpi_dmi_table_late[] = { - /* - * HP laptops which use a DSDT reporting as HP/SB400/10000, - * which includes some code which overrides all temperature - * trip points to 16C if the INTIN2 input of the I/O APIC - * is enabled. This input is incorrectly designated the - * ISA IRQ 0 via an interrupt source override even though - * it is wired to the output of the master 8259A and INTIN0 - * is not connected at all. Force ignoring BIOS IRQ0 - * override in that cases. - */ - { - .callback = dmi_ignore_irq0_timer_override, - .ident = "HP nx6115 laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), - }, - }, - { - .callback = dmi_ignore_irq0_timer_override, - .ident = "HP NX6125 laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"), - }, - }, - { - .callback = dmi_ignore_irq0_timer_override, - .ident = "HP NX6325 laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), - }, - }, - { - .callback = dmi_ignore_irq0_timer_override, - .ident = "HP 6715b laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), - }, - }, - { - .callback = dmi_ignore_irq0_timer_override, - .ident = "FUJITSU SIEMENS", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), - DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"), - }, - }, - {} -}; - -/* - * acpi_boot_table_init() and acpi_boot_init() - * called from setup_arch(), always. - * 1. checksums all tables - * 2. enumerates lapics - * 3. enumerates io-apics - * - * acpi_table_init() is separate to allow reading SRAT without - * other side effects. - * - * side effects of acpi_boot_init: - * acpi_lapic = 1 if LAPIC found - * acpi_ioapic = 1 if IOAPIC found - * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; - * if acpi_blacklisted() acpi_disabled = 1; - * acpi_irq_model=... - * ... - */ - -void __init acpi_boot_table_init(void) -{ - dmi_check_system(acpi_dmi_table); - - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return; - - /* - * Initialize the ACPI boot-time table parser. - */ - if (acpi_table_init()) { - disable_acpi(); - return; - } - - acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); - - /* - * blacklist may disable ACPI entirely - */ - if (acpi_blacklisted()) { - if (acpi_force) { - printk(KERN_WARNING PREFIX "acpi=force override\n"); - } else { - printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); - disable_acpi(); - return; - } - } -} - -int __init early_acpi_boot_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return 1; - - /* - * Process the Multiple APIC Description Table (MADT), if present - */ - early_acpi_process_madt(); - - return 0; -} - -int __init acpi_boot_init(void) -{ - /* those are executed after early-quirks are executed */ - dmi_check_system(acpi_dmi_table_late); - - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return 1; - - acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); - - /* - * set sci_int and PM timer address - */ - acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt); - - /* - * Process the Multiple APIC Description Table (MADT), if present - */ - acpi_process_madt(); - - acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); - - if (!acpi_noirq) - x86_init.pci.init = pci_acpi_init; - - return 0; -} - -static int __init parse_acpi(char *arg) -{ - if (!arg) - return -EINVAL; - - /* "acpi=off" disables both ACPI table parsing and interpreter */ - if (strcmp(arg, "off") == 0) { - disable_acpi(); - } - /* acpi=force to over-ride black-list */ - else if (strcmp(arg, "force") == 0) { - acpi_force = 1; - acpi_disabled = 0; - } - /* acpi=strict disables out-of-spec workarounds */ - else if (strcmp(arg, "strict") == 0) { - acpi_strict = 1; - } - /* acpi=rsdt use RSDT instead of XSDT */ - else if (strcmp(arg, "rsdt") == 0) { - acpi_rsdt_forced = 1; - } - /* "acpi=noirq" disables ACPI interrupt routing */ - else if (strcmp(arg, "noirq") == 0) { - acpi_noirq_set(); - } - /* "acpi=copy_dsdt" copys DSDT */ - else if (strcmp(arg, "copy_dsdt") == 0) { - acpi_gbl_copy_dsdt_locally = 1; - } else { - /* Core will printk when we return error. */ - return -EINVAL; - } - return 0; -} -early_param("acpi", parse_acpi); - -/* FIXME: Using pci= for an ACPI parameter is a travesty. */ -static int __init parse_pci(char *arg) -{ - if (arg && strcmp(arg, "noacpi") == 0) - acpi_disable_pci(); - return 0; -} -early_param("pci", parse_pci); - -int __init acpi_mps_check(void) -{ -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE) -/* mptable code is not built-in*/ - if (acpi_disabled || acpi_noirq) { - printk(KERN_WARNING "MPS support code is not built-in.\n" - "Using acpi=off or acpi=noirq or pci=noacpi " - "may have problem\n"); - return 1; - } -#endif - return 0; -} - -#ifdef CONFIG_X86_IO_APIC -static int __init parse_acpi_skip_timer_override(char *arg) -{ - acpi_skip_timer_override = 1; - return 0; -} -early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); - -static int __init parse_acpi_use_timer_override(char *arg) -{ - acpi_use_timer_override = 1; - return 0; -} -early_param("acpi_use_timer_override", parse_acpi_use_timer_override); -#endif /* CONFIG_X86_IO_APIC */ - -static int __init setup_acpi_sci(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "edge")) - acpi_sci_flags = ACPI_MADT_TRIGGER_EDGE | - (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); - else if (!strcmp(s, "level")) - acpi_sci_flags = ACPI_MADT_TRIGGER_LEVEL | - (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); - else if (!strcmp(s, "high")) - acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_HIGH | - (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); - else if (!strcmp(s, "low")) - acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_LOW | - (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); - else - return -EINVAL; - return 0; -} -early_param("acpi_sci", setup_acpi_sci); - -int __acpi_acquire_global_lock(unsigned int *lock) -{ - unsigned int old, new, val; - do { - old = *lock; - new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); - val = cmpxchg(lock, old, new); - } while (unlikely (val != old)); - return (new < 3) ? -1 : 0; -} - -int __acpi_release_global_lock(unsigned int *lock) -{ - unsigned int old, new, val; - do { - old = *lock; - new = old & ~0x3; - val = cmpxchg(lock, old, new); - } while (unlikely (val != old)); - return old & 0x1; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/cstate.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/cstate.c deleted file mode 100644 index d2b7f277..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/cstate.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * - Added _PDC for SMP C-states on Intel CPUs - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/acpi.h> -#include <linux/cpu.h> -#include <linux/sched.h> - -#include <acpi/processor.h> -#include <asm/acpi.h> -#include <asm/mwait.h> -#include <asm/special_insns.h> - -/* - * Initialize bm_flags based on the CPU cache properties - * On SMP it depends on cache configuration - * - When cache is not shared among all CPUs, we flush cache - * before entering C3. - * - When cache is shared among all CPUs, we use bm_check - * mechanism as in UP case - * - * This routine is called only after all the CPUs are online - */ -void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, - unsigned int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - - flags->bm_check = 0; - if (num_online_cpus() == 1) - flags->bm_check = 1; - else if (c->x86_vendor == X86_VENDOR_INTEL) { - /* - * Today all MP CPUs that support C3 share cache. - * And caches should not be flushed by software while - * entering C3 type state. - */ - flags->bm_check = 1; - } - - /* - * On all recent Intel platforms, ARB_DISABLE is a nop. - * So, set bm_control to zero to indicate that ARB_DISABLE - * is not required while entering C3 type state on - * P4, Core and beyond CPUs - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f))) - flags->bm_control = 0; -} -EXPORT_SYMBOL(acpi_processor_power_init_bm_check); - -/* The code below handles cstate entry with monitor-mwait pair on Intel*/ - -struct cstate_entry { - struct { - unsigned int eax; - unsigned int ecx; - } states[ACPI_PROCESSOR_MAX_POWER]; -}; -static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */ - -static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; - -#define NATIVE_CSTATE_BEYOND_HALT (2) - -static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) -{ - struct acpi_processor_cx *cx = _cx; - long retval; - unsigned int eax, ebx, ecx, edx; - unsigned int edx_part; - unsigned int cstate_type; /* C-state type and not ACPI C-state type */ - unsigned int num_cstate_subtype; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); - - /* Check whether this particular cx_type (in CST) is supported or not */ - cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) & - MWAIT_CSTATE_MASK) + 1; - edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); - num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; - - retval = 0; - if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { - retval = -1; - goto out; - } - - /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) { - retval = -1; - goto out; - } - - if (!mwait_supported[cstate_type]) { - mwait_supported[cstate_type] = 1; - printk(KERN_DEBUG - "Monitor-Mwait will be used to enter C-%d " - "state\n", cx->type); - } - snprintf(cx->desc, - ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x", - cx->address); -out: - return retval; -} - -int acpi_processor_ffh_cstate_probe(unsigned int cpu, - struct acpi_processor_cx *cx, struct acpi_power_register *reg) -{ - struct cstate_entry *percpu_entry; - struct cpuinfo_x86 *c = &cpu_data(cpu); - long retval; - - if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF) - return -1; - - if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT) - return -1; - - percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); - percpu_entry->states[cx->index].eax = 0; - percpu_entry->states[cx->index].ecx = 0; - - /* Make sure we are running on right CPU */ - - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); - if (retval == 0) { - /* Use the hint in CST */ - percpu_entry->states[cx->index].eax = cx->address; - percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; - } - - /* - * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared, - * then we should skip checking BM_STS for this C-state. - * ref: "Intel Processor Vendor-Specific ACPI Interface Specification" - */ - if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2)) - cx->bm_sts_skip = 1; - - return retval; -} -EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) - clflush((void *)¤t_thread_info()->flags); - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(ax, cx); - } -} - -void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) -{ - unsigned int cpu = smp_processor_id(); - struct cstate_entry *percpu_entry; - - percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); - mwait_idle_with_hints(percpu_entry->states[cx->index].eax, - percpu_entry->states[cx->index].ecx); -} -EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter); - -static int __init ffh_cstate_init(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_INTEL) - return -1; - - cpu_cstate_entry = alloc_percpu(struct cstate_entry); - return 0; -} - -static void __exit ffh_cstate_exit(void) -{ - free_percpu(cpu_cstate_entry); - cpu_cstate_entry = NULL; -} - -arch_initcall(ffh_cstate_init); -__exitcall(ffh_cstate_exit); diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/Makefile b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/Makefile deleted file mode 100644 index 6a564ac6..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/Makefile +++ /dev/null @@ -1,59 +0,0 @@ -# -# arch/x86/kernel/acpi/realmode/Makefile -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# - -always := wakeup.bin -targets := wakeup.elf wakeup.lds - -wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o - -# The link order of the video-*.o modules can matter. In particular, -# video-vga.o *must* be listed first, followed by video-vesa.o. -# Hardware-specific drivers should follow in the order they should be -# probed, and video-bios.o should typically be last. -wakeup-y += video-vga.o -wakeup-y += video-vesa.o -wakeup-y += video-bios.o - -targets += $(wakeup-y) - -bootsrc := $(src)/../../../boot - -# --------------------------------------------------------------------------- - -# How to compile the 16-bit code. Note we always compile for -march=i386, -# that way we can complain to the user if the CPU is insufficient. -# Compile with _SETUP since this is similar to the boot-time setup code. -KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ - -I$(srctree)/$(bootsrc) \ - $(cflags-y) \ - -Wall -Wstrict-prototypes \ - -march=i386 -mregparm=3 \ - -include $(srctree)/$(bootsrc)/code16gcc.h \ - -fno-strict-aliasing -fomit-frame-pointer \ - $(call cc-option, -ffreestanding) \ - $(call cc-option, -fno-toplevel-reorder,\ - $(call cc-option, -fno-unit-at-a-time)) \ - $(call cc-option, -fno-stack-protector) \ - $(call cc-option, -mpreferred-stack-boundary=2) -KBUILD_CFLAGS += $(call cc-option, -m32) -KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -GCOV_PROFILE := n - -WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) - -LDFLAGS_wakeup.elf := -T - -CPPFLAGS_wakeup.lds += -P -C - -$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE - $(call if_changed,ld) - -OBJCOPYFLAGS_wakeup.bin := -O binary - -$(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE - $(call if_changed,objcopy) diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/bioscall.S b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/bioscall.S deleted file mode 100644 index f51eb0bb..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/bioscall.S +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/bioscall.S" diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/copy.S b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/copy.S deleted file mode 100644 index dc59ebee..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/copy.S +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/copy.S" diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/regs.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/regs.c deleted file mode 100644 index 6206033b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/regs.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/regs.c" diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-bios.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-bios.c deleted file mode 100644 index 7deabc14..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-bios.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-bios.c" diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-mode.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-mode.c deleted file mode 100644 index 328ad209..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-mode.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-mode.c" diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-vesa.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-vesa.c deleted file mode 100644 index 9dbb9672..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-vesa.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-vesa.c" diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-vga.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-vga.c deleted file mode 100644 index bcc81255..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/video-vga.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-vga.c" diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakemain.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakemain.c deleted file mode 100644 index 883962d9..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakemain.c +++ /dev/null @@ -1,81 +0,0 @@ -#include "wakeup.h" -#include "boot.h" - -static void udelay(int loops) -{ - while (loops--) - io_delay(); /* Approximately 1 us */ -} - -static void beep(unsigned int hz) -{ - u8 enable; - - if (!hz) { - enable = 0x00; /* Turn off speaker */ - } else { - u16 div = 1193181/hz; - - outb(0xb6, 0x43); /* Ctr 2, squarewave, load, binary */ - io_delay(); - outb(div, 0x42); /* LSB of counter */ - io_delay(); - outb(div >> 8, 0x42); /* MSB of counter */ - io_delay(); - - enable = 0x03; /* Turn on speaker */ - } - inb(0x61); /* Dummy read of System Control Port B */ - io_delay(); - outb(enable, 0x61); /* Enable timer 2 output to speaker */ - io_delay(); -} - -#define DOT_HZ 880 -#define DASH_HZ 587 -#define US_PER_DOT 125000 - -/* Okay, this is totally silly, but it's kind of fun. */ -static void send_morse(const char *pattern) -{ - char s; - - while ((s = *pattern++)) { - switch (s) { - case '.': - beep(DOT_HZ); - udelay(US_PER_DOT); - beep(0); - udelay(US_PER_DOT); - break; - case '-': - beep(DASH_HZ); - udelay(US_PER_DOT * 3); - beep(0); - udelay(US_PER_DOT); - break; - default: /* Assume it's a space */ - udelay(US_PER_DOT * 3); - break; - } - } -} - -void main(void) -{ - /* Kill machine if structures are wrong */ - if (wakeup_header.real_magic != 0x12345678) - while (1); - - if (wakeup_header.realmode_flags & 4) - send_morse("...-"); - - if (wakeup_header.realmode_flags & 1) - asm volatile("lcallw $0xc000,$3"); - - if (wakeup_header.realmode_flags & 2) { - /* Need to call BIOS */ - probe_cards(0); - set_mode(wakeup_header.video_mode); - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.S b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.S deleted file mode 100644 index b4fd836e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.S +++ /dev/null @@ -1,170 +0,0 @@ -/* - * ACPI wakeup real mode startup stub - */ -#include <asm/segment.h> -#include <asm/msr-index.h> -#include <asm/page_types.h> -#include <asm/pgtable_types.h> -#include <asm/processor-flags.h> -#include "wakeup.h" - - .code16 - .section ".jump", "ax" - .globl _start -_start: - cli - jmp wakeup_code - -/* This should match the structure in wakeup.h */ - .section ".header", "a" - .globl wakeup_header -wakeup_header: -video_mode: .short 0 /* Video mode number */ -pmode_return: .byte 0x66, 0xea /* ljmpl */ - .long 0 /* offset goes here */ - .short __KERNEL_CS -pmode_cr0: .long 0 /* Saved %cr0 */ -pmode_cr3: .long 0 /* Saved %cr3 */ -pmode_cr4: .long 0 /* Saved %cr4 */ -pmode_efer: .quad 0 /* Saved EFER */ -pmode_gdt: .quad 0 -pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ -pmode_behavior: .long 0 /* Wakeup behavior flags */ -realmode_flags: .long 0 -real_magic: .long 0 -trampoline_segment: .word 0 -_pad1: .byte 0 -wakeup_jmp: .byte 0xea /* ljmpw */ -wakeup_jmp_off: .word 3f -wakeup_jmp_seg: .word 0 -wakeup_gdt: .quad 0, 0, 0 -signature: .long WAKEUP_HEADER_SIGNATURE - - .text - .code16 -wakeup_code: - cld - - /* Apparently some dimwit BIOS programmers don't know how to - program a PM to RM transition, and we might end up here with - junk in the data segment descriptor registers. The only way - to repair that is to go into PM and fix it ourselves... */ - movw $16, %cx - lgdtl %cs:wakeup_gdt - movl %cr0, %eax - orb $X86_CR0_PE, %al - movl %eax, %cr0 - jmp 1f -1: ljmpw $8, $2f -2: - movw %cx, %ds - movw %cx, %es - movw %cx, %ss - movw %cx, %fs - movw %cx, %gs - - andb $~X86_CR0_PE, %al - movl %eax, %cr0 - jmp wakeup_jmp -3: - /* Set up segments */ - movw %cs, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - lidtl wakeup_idt - - movl $wakeup_stack_end, %esp - - /* Clear the EFLAGS */ - pushl $0 - popfl - - /* Check header signature... */ - movl signature, %eax - cmpl $WAKEUP_HEADER_SIGNATURE, %eax - jne bogus_real_magic - - /* Check we really have everything... */ - movl end_signature, %eax - cmpl $WAKEUP_END_SIGNATURE, %eax - jne bogus_real_magic - - /* Call the C code */ - calll main - - /* Restore MISC_ENABLE before entering protected mode, in case - BIOS decided to clear XD_DISABLE during S3. */ - movl pmode_behavior, %eax - btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax - jnc 1f - - movl pmode_misc_en, %eax - movl pmode_misc_en + 4, %edx - movl $MSR_IA32_MISC_ENABLE, %ecx - wrmsr -1: - - /* Do any other stuff... */ - -#ifndef CONFIG_64BIT - /* This could also be done in C code... */ - movl pmode_cr3, %eax - movl %eax, %cr3 - - movl pmode_cr4, %ecx - jecxz 1f - movl %ecx, %cr4 -1: - movl pmode_efer, %eax - movl pmode_efer + 4, %edx - movl %eax, %ecx - orl %edx, %ecx - jz 1f - movl $MSR_EFER, %ecx - wrmsr -1: - - lgdtl pmode_gdt - - /* This really couldn't... */ - movl pmode_cr0, %eax - movl %eax, %cr0 - jmp pmode_return -#else - pushw $0 - pushw trampoline_segment - pushw $0 - lret -#endif - -bogus_real_magic: -1: - hlt - jmp 1b - - .data - .balign 8 - - /* This is the standard real-mode IDT */ -wakeup_idt: - .word 0xffff /* limit */ - .long 0 /* address */ - .word 0 - - .globl HEAP, heap_end -HEAP: - .long wakeup_heap -heap_end: - .long wakeup_stack - - .bss -wakeup_heap: - .space 2048 -wakeup_stack: - .space 2048 -wakeup_stack_end: - - .section ".signature","a" -end_signature: - .long WAKEUP_END_SIGNATURE diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.h b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.h deleted file mode 100644 index 97a29e14..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Definitions for the wakeup data structure at the head of the - * wakeup code. - */ - -#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H -#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H - -#ifndef __ASSEMBLY__ -#include <linux/types.h> - -/* This must match data at wakeup.S */ -struct wakeup_header { - u16 video_mode; /* Video mode number */ - u16 _jmp1; /* ljmpl opcode, 32-bit only */ - u32 pmode_entry; /* Protected mode resume point, 32-bit only */ - u16 _jmp2; /* CS value, 32-bit only */ - u32 pmode_cr0; /* Protected mode cr0 */ - u32 pmode_cr3; /* Protected mode cr3 */ - u32 pmode_cr4; /* Protected mode cr4 */ - u32 pmode_efer_low; /* Protected mode EFER */ - u32 pmode_efer_high; - u64 pmode_gdt; - u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ - u32 pmode_misc_en_high; - u32 pmode_behavior; /* Wakeup routine behavior flags */ - u32 realmode_flags; - u32 real_magic; - u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ - u8 _pad1; - u8 wakeup_jmp; - u16 wakeup_jmp_off; - u16 wakeup_jmp_seg; - u64 wakeup_gdt[3]; - u32 signature; /* To check we have correct structure */ -} __attribute__((__packed__)); - -extern struct wakeup_header wakeup_header; -#endif - -#define WAKEUP_HEADER_OFFSET 8 -#define WAKEUP_HEADER_SIGNATURE 0x51ee1111 -#define WAKEUP_END_SIGNATURE 0x65a22c82 - -/* Wakeup behavior bits */ -#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 - -#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.lds.S deleted file mode 100644 index d4f8010a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ /dev/null @@ -1,62 +0,0 @@ -/* - * wakeup.ld - * - * Linker script for the real-mode wakeup code - */ -#undef i386 -#include "wakeup.h" - -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) - -SECTIONS -{ - . = 0; - .jump : { - *(.jump) - } = 0x90909090 - - . = WAKEUP_HEADER_OFFSET; - .header : { - *(.header) - } - - . = ALIGN(16); - .text : { - *(.text*) - } = 0x90909090 - - . = ALIGN(16); - .rodata : { - *(.rodata*) - } - - .videocards : { - video_cards = .; - *(.videocards) - video_cards_end = .; - } - - . = ALIGN(16); - .data : { - *(.data*) - } - - . = ALIGN(16); - .bss : { - __bss_start = .; - *(.bss) - __bss_end = .; - } - - .signature : { - *(.signature) - } - - _end = .; - - /DISCARD/ : { - *(.note*) - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/sleep.c b/ANDROID_3.4.5/arch/x86/kernel/acpi/sleep.c deleted file mode 100644 index 146a49c7..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/sleep.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * sleep.c - x86-specific ACPI sleep support. - * - * Copyright (C) 2001-2003 Patrick Mochel - * Copyright (C) 2001-2003 Pavel Machek <pavel@ucw.cz> - */ - -#include <linux/acpi.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/dmi.h> -#include <linux/cpumask.h> -#include <asm/segment.h> -#include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/cacheflush.h> - -#include "realmode/wakeup.h" -#include "sleep.h" - -unsigned long acpi_realmode_flags; - -#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) -static char temp_stack[4096]; -#endif - -asmlinkage void acpi_enter_s3(void) -{ - acpi_enter_sleep_state(3, wake_sleep_flags); -} -/** - * acpi_suspend_lowlevel - save kernel state - * - * Create an identity mapped page table and copy the wakeup routine to - * low memory. - */ -int acpi_suspend_lowlevel(void) -{ - struct wakeup_header *header; - /* address in low memory of the wakeup routine. */ - char *acpi_realmode; - - acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code); - - header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET); - if (header->signature != WAKEUP_HEADER_SIGNATURE) { - printk(KERN_ERR "wakeup header does not match\n"); - return -EINVAL; - } - - header->video_mode = saved_video_mode; - - header->wakeup_jmp_seg = acpi_wakeup_address >> 4; - - /* - * Set up the wakeup GDT. We set these up as Big Real Mode, - * that is, with limits set to 4 GB. At least the Lenovo - * Thinkpad X61 is known to need this for the video BIOS - * initialization quirk to work; this is likely to also - * be the case for other laptops or integrated video devices. - */ - - /* GDT[0]: GDT self-pointer */ - header->wakeup_gdt[0] = - (u64)(sizeof(header->wakeup_gdt) - 1) + - ((u64)__pa(&header->wakeup_gdt) << 16); - /* GDT[1]: big real mode-like code segment */ - header->wakeup_gdt[1] = - GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); - /* GDT[2]: big real mode-like data segment */ - header->wakeup_gdt[2] = - GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff); - -#ifndef CONFIG_64BIT - store_gdt((struct desc_ptr *)&header->pmode_gdt); - - if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, - &header->pmode_efer_high)) - header->pmode_efer_low = header->pmode_efer_high = 0; -#endif /* !CONFIG_64BIT */ - - header->pmode_cr0 = read_cr0(); - header->pmode_cr4 = read_cr4_safe(); - header->pmode_behavior = 0; - if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, - &header->pmode_misc_en_low, - &header->pmode_misc_en_high)) - header->pmode_behavior |= - (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE); - header->realmode_flags = acpi_realmode_flags; - header->real_magic = 0x12345678; - -#ifndef CONFIG_64BIT - header->pmode_entry = (u32)&wakeup_pmode_return; - header->pmode_cr3 = (u32)__pa(&initial_page_table); - saved_magic = 0x12345678; -#else /* CONFIG_64BIT */ - header->trampoline_segment = trampoline_address() >> 4; -#ifdef CONFIG_SMP - stack_start = (unsigned long)temp_stack + sizeof(temp_stack); - early_gdt_descr.address = - (unsigned long)get_cpu_gdt_table(smp_processor_id()); - initial_gs = per_cpu_offset(smp_processor_id()); -#endif - initial_code = (unsigned long)wakeup_long64; - saved_magic = 0x123456789abcdef0L; -#endif /* CONFIG_64BIT */ - - do_suspend_lowlevel(); - return 0; -} - -static int __init acpi_sleep_setup(char *str) -{ - while ((str != NULL) && (*str != '\0')) { - if (strncmp(str, "s3_bios", 7) == 0) - acpi_realmode_flags |= 1; - if (strncmp(str, "s3_mode", 7) == 0) - acpi_realmode_flags |= 2; - if (strncmp(str, "s3_beep", 7) == 0) - acpi_realmode_flags |= 4; -#ifdef CONFIG_HIBERNATION - if (strncmp(str, "s4_nohwsig", 10) == 0) - acpi_no_s4_hw_signature(); -#endif - if (strncmp(str, "nonvs", 5) == 0) - acpi_nvs_nosave(); - if (strncmp(str, "old_ordering", 12) == 0) - acpi_old_suspend_ordering(); - str = strchr(str, ','); - if (str != NULL) - str += strspn(str, ", \t"); - } - return 1; -} - -__setup("acpi_sleep=", acpi_sleep_setup); diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/sleep.h b/ANDROID_3.4.5/arch/x86/kernel/acpi/sleep.h deleted file mode 100644 index d68677a2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/sleep.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Variables and functions used by the code in sleep.c - */ - -#include <asm/trampoline.h> -#include <linux/linkage.h> - -extern unsigned long saved_video_mode; -extern long saved_magic; - -extern int wakeup_pmode_return; - -extern u8 wake_sleep_flags; -extern asmlinkage void acpi_enter_s3(void); - -extern unsigned long acpi_copy_wakeup_routine(unsigned long); -extern void wakeup_long64(void); - -extern void do_suspend_lowlevel(void); diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_32.S b/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_32.S deleted file mode 100644 index 72610839..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_32.S +++ /dev/null @@ -1,98 +0,0 @@ - .section .text..page_aligned -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/page_types.h> - -# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 - - .code32 - ALIGN - -ENTRY(wakeup_pmode_return) -wakeup_pmode_return: - movw $__KERNEL_DS, %ax - movw %ax, %ss - movw %ax, %ds - movw %ax, %es - movw %ax, %fs - movw %ax, %gs - - # reload the gdt, as we need the full 32 bit address - lgdt saved_gdt - lidt saved_idt - lldt saved_ldt - ljmp $(__KERNEL_CS), $1f -1: - movl %cr3, %eax - movl %eax, %cr3 - wbinvd - - # and restore the stack ... but you need gdt for this to work - movl saved_context_esp, %esp - - movl %cs:saved_magic, %eax - cmpl $0x12345678, %eax - jne bogus_magic - - # jump to place where we left off - movl saved_eip, %eax - jmp *%eax - -bogus_magic: - jmp bogus_magic - - - -save_registers: - sgdt saved_gdt - sidt saved_idt - sldt saved_ldt - str saved_tss - - leal 4(%esp), %eax - movl %eax, saved_context_esp - movl %ebx, saved_context_ebx - movl %ebp, saved_context_ebp - movl %esi, saved_context_esi - movl %edi, saved_context_edi - pushfl - popl saved_context_eflags - - movl $ret_point, saved_eip - ret - - -restore_registers: - movl saved_context_ebp, %ebp - movl saved_context_ebx, %ebx - movl saved_context_esi, %esi - movl saved_context_edi, %edi - pushl saved_context_eflags - popfl - ret - -ENTRY(do_suspend_lowlevel) - call save_processor_state - call save_registers - call acpi_enter_s3 - -# In case of S3 failure, we'll emerge here. Jump -# to ret_point to recover - jmp ret_point - .p2align 4,,7 -ret_point: - call restore_registers - call restore_processor_state - ret - -.data -ALIGN -ENTRY(saved_magic) .long 0 -ENTRY(saved_eip) .long 0 - -# saved registers -saved_gdt: .long 0,0 -saved_idt: .long 0,0 -saved_ldt: .long 0 -saved_tss: .long 0 - diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_64.S b/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_64.S deleted file mode 100644 index 014d1d28..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_64.S +++ /dev/null @@ -1,122 +0,0 @@ -.text -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/pgtable_types.h> -#include <asm/page_types.h> -#include <asm/msr.h> -#include <asm/asm-offsets.h> - -# Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 - -.code64 - /* - * Hooray, we are in Long 64-bit mode (but still running in low memory) - */ -ENTRY(wakeup_long64) - movq saved_magic, %rax - movq $0x123456789abcdef0, %rdx - cmpq %rdx, %rax - jne bogus_64_magic - - movw $__KERNEL_DS, %ax - movw %ax, %ss - movw %ax, %ds - movw %ax, %es - movw %ax, %fs - movw %ax, %gs - movq saved_rsp, %rsp - - movq saved_rbx, %rbx - movq saved_rdi, %rdi - movq saved_rsi, %rsi - movq saved_rbp, %rbp - - movq saved_rip, %rax - jmp *%rax -ENDPROC(wakeup_long64) - -bogus_64_magic: - jmp bogus_64_magic - -ENTRY(do_suspend_lowlevel) - subq $8, %rsp - xorl %eax, %eax - call save_processor_state - - movq $saved_context, %rax - movq %rsp, pt_regs_sp(%rax) - movq %rbp, pt_regs_bp(%rax) - movq %rsi, pt_regs_si(%rax) - movq %rdi, pt_regs_di(%rax) - movq %rbx, pt_regs_bx(%rax) - movq %rcx, pt_regs_cx(%rax) - movq %rdx, pt_regs_dx(%rax) - movq %r8, pt_regs_r8(%rax) - movq %r9, pt_regs_r9(%rax) - movq %r10, pt_regs_r10(%rax) - movq %r11, pt_regs_r11(%rax) - movq %r12, pt_regs_r12(%rax) - movq %r13, pt_regs_r13(%rax) - movq %r14, pt_regs_r14(%rax) - movq %r15, pt_regs_r15(%rax) - pushfq - popq pt_regs_flags(%rax) - - movq $resume_point, saved_rip(%rip) - - movq %rsp, saved_rsp - movq %rbp, saved_rbp - movq %rbx, saved_rbx - movq %rdi, saved_rdi - movq %rsi, saved_rsi - - addq $8, %rsp - call acpi_enter_s3 - /* in case something went wrong, restore the machine status and go on */ - jmp resume_point - - .align 4 -resume_point: - /* We don't restore %rax, it must be 0 anyway */ - movq $saved_context, %rax - movq saved_context_cr4(%rax), %rbx - movq %rbx, %cr4 - movq saved_context_cr3(%rax), %rbx - movq %rbx, %cr3 - movq saved_context_cr2(%rax), %rbx - movq %rbx, %cr2 - movq saved_context_cr0(%rax), %rbx - movq %rbx, %cr0 - pushq pt_regs_flags(%rax) - popfq - movq pt_regs_sp(%rax), %rsp - movq pt_regs_bp(%rax), %rbp - movq pt_regs_si(%rax), %rsi - movq pt_regs_di(%rax), %rdi - movq pt_regs_bx(%rax), %rbx - movq pt_regs_cx(%rax), %rcx - movq pt_regs_dx(%rax), %rdx - movq pt_regs_r8(%rax), %r8 - movq pt_regs_r9(%rax), %r9 - movq pt_regs_r10(%rax), %r10 - movq pt_regs_r11(%rax), %r11 - movq pt_regs_r12(%rax), %r12 - movq pt_regs_r13(%rax), %r13 - movq pt_regs_r14(%rax), %r14 - movq pt_regs_r15(%rax), %r15 - - xorl %eax, %eax - addq $8, %rsp - jmp restore_processor_state -ENDPROC(do_suspend_lowlevel) - -.data -ENTRY(saved_rbp) .quad 0 -ENTRY(saved_rsi) .quad 0 -ENTRY(saved_rdi) .quad 0 -ENTRY(saved_rbx) .quad 0 - -ENTRY(saved_rip) .quad 0 -ENTRY(saved_rsp) .quad 0 - -ENTRY(saved_magic) .quad 0 diff --git a/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_rm.S b/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_rm.S deleted file mode 100644 index 63b8ab52..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/acpi/wakeup_rm.S +++ /dev/null @@ -1,12 +0,0 @@ -/* - * Wrapper script for the realmode binary as a transport object - * before copying to low memory. - */ -#include <asm/page_types.h> - - .section ".x86_trampoline","a" - .balign PAGE_SIZE - .globl acpi_wakeup_code -acpi_wakeup_code: - .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" - .size acpi_wakeup_code, .-acpi_wakeup_code diff --git a/ANDROID_3.4.5/arch/x86/kernel/alternative.c b/ANDROID_3.4.5/arch/x86/kernel/alternative.c deleted file mode 100644 index 1f84794f..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/alternative.c +++ /dev/null @@ -1,742 +0,0 @@ -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/mutex.h> -#include <linux/list.h> -#include <linux/stringify.h> -#include <linux/kprobes.h> -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <linux/memory.h> -#include <linux/stop_machine.h> -#include <linux/slab.h> -#include <asm/alternative.h> -#include <asm/sections.h> -#include <asm/pgtable.h> -#include <asm/mce.h> -#include <asm/nmi.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> -#include <asm/io.h> -#include <asm/fixmap.h> - -#define MAX_PATCH_LEN (255-1) - -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - -static int __initdata_or_module debug_alternative; - -static int __init debug_alt(char *str) -{ - debug_alternative = 1; - return 1; -} -__setup("debug-alternative", debug_alt); - -static int noreplace_smp; - -static int __init setup_noreplace_smp(char *str) -{ - noreplace_smp = 1; - return 1; -} -__setup("noreplace-smp", setup_noreplace_smp); - -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - -#define DPRINTK(fmt, args...) if (debug_alternative) \ - printk(KERN_DEBUG fmt, args) - -/* - * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes - * that correspond to that nop. Getting from one nop to the next, we - * add to the array the offset that is equal to the sum of all sizes of - * nops preceding the one we are after. - * - * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the - * nice symmetry of sizes of the previous nops. - */ -#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) -static const unsigned char intelnops[] = -{ - GENERIC_NOP1, - GENERIC_NOP2, - GENERIC_NOP3, - GENERIC_NOP4, - GENERIC_NOP5, - GENERIC_NOP6, - GENERIC_NOP7, - GENERIC_NOP8, - GENERIC_NOP5_ATOMIC -}; -static const unsigned char * const intel_nops[ASM_NOP_MAX+2] = -{ - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -#ifdef K8_NOP1 -static const unsigned char k8nops[] = -{ - K8_NOP1, - K8_NOP2, - K8_NOP3, - K8_NOP4, - K8_NOP5, - K8_NOP6, - K8_NOP7, - K8_NOP8, - K8_NOP5_ATOMIC -}; -static const unsigned char * const k8_nops[ASM_NOP_MAX+2] = -{ - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -#if defined(K7_NOP1) && !defined(CONFIG_X86_64) -static const unsigned char k7nops[] = -{ - K7_NOP1, - K7_NOP2, - K7_NOP3, - K7_NOP4, - K7_NOP5, - K7_NOP6, - K7_NOP7, - K7_NOP8, - K7_NOP5_ATOMIC -}; -static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = -{ - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -#ifdef P6_NOP1 -static const unsigned char __initconst_or_module p6nops[] = -{ - P6_NOP1, - P6_NOP2, - P6_NOP3, - P6_NOP4, - P6_NOP5, - P6_NOP6, - P6_NOP7, - P6_NOP8, - P6_NOP5_ATOMIC -}; -static const unsigned char * const p6_nops[ASM_NOP_MAX+2] = -{ - NULL, - p6nops, - p6nops + 1, - p6nops + 1 + 2, - p6nops + 1 + 2 + 3, - p6nops + 1 + 2 + 3 + 4, - p6nops + 1 + 2 + 3 + 4 + 5, - p6nops + 1 + 2 + 3 + 4 + 5 + 6, - p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, - p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, -}; -#endif - -/* Initialize these to a safe default */ -#ifdef CONFIG_X86_64 -const unsigned char * const *ideal_nops = p6_nops; -#else -const unsigned char * const *ideal_nops = intel_nops; -#endif - -void __init arch_init_ideal_nops(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - /* - * Due to a decoder implementation quirk, some - * specific Intel CPUs actually perform better with - * the "k8_nops" than with the SDM-recommended NOPs. - */ - if (boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model >= 0x0f && - boot_cpu_data.x86_model != 0x1c && - boot_cpu_data.x86_model != 0x26 && - boot_cpu_data.x86_model != 0x27 && - boot_cpu_data.x86_model < 0x30) { - ideal_nops = k8_nops; - } else if (boot_cpu_has(X86_FEATURE_NOPL)) { - ideal_nops = p6_nops; - } else { -#ifdef CONFIG_X86_64 - ideal_nops = k8_nops; -#else - ideal_nops = intel_nops; -#endif - } - - default: -#ifdef CONFIG_X86_64 - ideal_nops = k8_nops; -#else - if (boot_cpu_has(X86_FEATURE_K8)) - ideal_nops = k8_nops; - else if (boot_cpu_has(X86_FEATURE_K7)) - ideal_nops = k7_nops; - else - ideal_nops = intel_nops; -#endif - } -} - -/* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void __init_or_module add_nops(void *insns, unsigned int len) -{ - while (len > 0) { - unsigned int noplen = len; - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - memcpy(insns, ideal_nops[noplen], noplen); - insns += noplen; - len -= noplen; - } -} - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -extern s32 __smp_locks[], __smp_locks_end[]; -void *text_poke_early(void *addr, const void *opcode, size_t len); - -/* Replace instructions with better alternatives for this CPU type. - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that asymmetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ - -void __init_or_module apply_alternatives(struct alt_instr *start, - struct alt_instr *end) -{ - struct alt_instr *a; - u8 *instr, *replacement; - u8 insnbuf[MAX_PATCH_LEN]; - - DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); - /* - * The scan order should be from start to end. A later scanned - * alternative code can overwrite a previous scanned alternative code. - * Some kernel functions (e.g. memcpy, memset, etc) use this order to - * patch code. - * - * So be careful if you want to change the scan order to any other - * order. - */ - for (a = start; a < end; a++) { - instr = (u8 *)&a->instr_offset + a->instr_offset; - replacement = (u8 *)&a->repl_offset + a->repl_offset; - BUG_ON(a->replacementlen > a->instrlen); - BUG_ON(a->instrlen > sizeof(insnbuf)); - BUG_ON(a->cpuid >= NCAPINTS*32); - if (!boot_cpu_has(a->cpuid)) - continue; - - memcpy(insnbuf, replacement, a->replacementlen); - - /* 0xe8 is a relative jump; fix the offset. */ - if (*insnbuf == 0xe8 && a->replacementlen == 5) - *(s32 *)(insnbuf + 1) += replacement - instr; - - add_nops(insnbuf + a->replacementlen, - a->instrlen - a->replacementlen); - - text_poke_early(instr, insnbuf, a->instrlen); - } -} - -#ifdef CONFIG_SMP - -static void alternatives_smp_lock(const s32 *start, const s32 *end, - u8 *text, u8 *text_end) -{ - const s32 *poff; - - mutex_lock(&text_mutex); - for (poff = start; poff < end; poff++) { - u8 *ptr = (u8 *)poff + *poff; - - if (!*poff || ptr < text || ptr >= text_end) - continue; - /* turn DS segment override prefix into lock prefix */ - if (*ptr == 0x3e) - text_poke(ptr, ((unsigned char []){0xf0}), 1); - }; - mutex_unlock(&text_mutex); -} - -static void alternatives_smp_unlock(const s32 *start, const s32 *end, - u8 *text, u8 *text_end) -{ - const s32 *poff; - - if (noreplace_smp) - return; - - mutex_lock(&text_mutex); - for (poff = start; poff < end; poff++) { - u8 *ptr = (u8 *)poff + *poff; - - if (!*poff || ptr < text || ptr >= text_end) - continue; - /* turn lock prefix into DS segment override prefix */ - if (*ptr == 0xf0) - text_poke(ptr, ((unsigned char []){0x3E}), 1); - }; - mutex_unlock(&text_mutex); -} - -struct smp_alt_module { - /* what is this ??? */ - struct module *mod; - char *name; - - /* ptrs to lock prefixes */ - const s32 *locks; - const s32 *locks_end; - - /* .text segment, needed to avoid patching init code ;) */ - u8 *text; - u8 *text_end; - - struct list_head next; -}; -static LIST_HEAD(smp_alt_modules); -static DEFINE_MUTEX(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ - -void __init_or_module alternatives_smp_module_add(struct module *mod, - char *name, - void *locks, void *locks_end, - void *text, void *text_end) -{ - struct smp_alt_module *smp; - - if (noreplace_smp) - return; - - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } - - smp = kzalloc(sizeof(*smp), GFP_KERNEL); - if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ - - smp->mod = mod; - smp->name = name; - smp->locks = locks; - smp->locks_end = locks_end; - smp->text = text; - smp->text_end = text_end; - DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", - __func__, smp->locks, smp->locks_end, - smp->text, smp->text_end, smp->name); - - mutex_lock(&smp_alt); - list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); - mutex_unlock(&smp_alt); -} - -void __init_or_module alternatives_smp_module_del(struct module *mod) -{ - struct smp_alt_module *item; - - if (smp_alt_once || noreplace_smp) - return; - - mutex_lock(&smp_alt); - list_for_each_entry(item, &smp_alt_modules, next) { - if (mod != item->mod) - continue; - list_del(&item->next); - mutex_unlock(&smp_alt); - DPRINTK("%s: %s\n", __func__, item->name); - kfree(item); - return; - } - mutex_unlock(&smp_alt); -} - -bool skip_smp_alternatives; -void alternatives_smp_switch(int smp) -{ - struct smp_alt_module *mod; - -#ifdef CONFIG_LOCKDEP - /* - * Older binutils section handling bug prevented - * alternatives-replacement from working reliably. - * - * If this still occurs then you should see a hang - * or crash shortly after this line: - */ - printk("lockdep: fixing up alternatives.\n"); -#endif - - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); - - mutex_lock(&smp_alt); - - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { - printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_lock(mod->locks, mod->locks_end, - mod->text, mod->text_end); - } else { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); - } - smp_mode = smp; - mutex_unlock(&smp_alt); -} - -/* Return 1 if the address range is reserved for smp-alternatives */ -int alternatives_text_reserved(void *start, void *end) -{ - struct smp_alt_module *mod; - const s32 *poff; - u8 *text_start = start; - u8 *text_end = end; - - list_for_each_entry(mod, &smp_alt_modules, next) { - if (mod->text > text_end || mod->text_end < text_start) - continue; - for (poff = mod->locks; poff < mod->locks_end; poff++) { - const u8 *ptr = (const u8 *)poff + *poff; - - if (text_start <= ptr && text_end > ptr) - return 1; - } - } - - return 0; -} -#endif - -#ifdef CONFIG_PARAVIRT -void __init_or_module apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{ - struct paravirt_patch_site *p; - char insnbuf[MAX_PATCH_LEN]; - - if (noreplace_paravirt) - return; - - for (p = start; p < end; p++) { - unsigned int used; - - BUG_ON(p->len > MAX_PATCH_LEN); - /* prep the buffer with the original instructions */ - memcpy(insnbuf, p->instr, p->len); - used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, - (unsigned long)p->instr, p->len); - - BUG_ON(used > p->len); - - /* Pad the rest with nops */ - add_nops(insnbuf + used, p->len - used); - text_poke_early(p->instr, insnbuf, p->len); - } -} -extern struct paravirt_patch_site __start_parainstructions[], - __stop_parainstructions[]; -#endif /* CONFIG_PARAVIRT */ - -void __init alternative_instructions(void) -{ - /* The patching is not fully atomic, so try to avoid local interruptions - that might execute the to be patched code. - Other CPUs are not running. */ - stop_nmi(); - - /* - * Don't stop machine check exceptions while patching. - * MCEs only happen when something got corrupted and in this - * case we must do something about the corruption. - * Ignoring it is worse than a unlikely patching race. - * Also machine checks tend to be broadcast and if one CPU - * goes into machine check the others follow quickly, so we don't - * expect a machine check to cause undue problems during to code - * patching. - */ - - apply_alternatives(__alt_instructions, __alt_instructions_end); - - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - -#ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { - alternatives_smp_module_add(NULL, "core kernel", - __smp_locks, __smp_locks_end, - _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_present_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); - } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - - if (smp_alt_once) - free_init_pages("SMP alternatives", - (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); - - restart_nmi(); -} - -/** - * text_poke_early - Update instructions on a live kernel at boot time - * @addr: address to modify - * @opcode: source of the copy - * @len: length to copy - * - * When you use this code to patch more than one byte of an instruction - * you need to make sure that other CPUs cannot execute this code in parallel. - * Also no thread must be currently preempted in the middle of these - * instructions. And on the local CPU you need to be protected again NMI or MCE - * handlers seeing an inconsistent instruction while you patch. - */ -void *__init_or_module text_poke_early(void *addr, const void *opcode, - size_t len) -{ - unsigned long flags; - local_irq_save(flags); - memcpy(addr, opcode, len); - sync_core(); - local_irq_restore(flags); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ - return addr; -} - -/** - * text_poke - Update instructions on a live kernel - * @addr: address to modify - * @opcode: source of the copy - * @len: length to copy - * - * Only atomic text poke/set should be allowed when not doing early patching. - * It means the size must be writable atomically and the address must be aligned - * in a way that permits an atomic write. It also makes sure we fit on a single - * page. - * - * Note: Must be called under text_mutex. - */ -void *__kprobes text_poke(void *addr, const void *opcode, size_t len) -{ - unsigned long flags; - char *vaddr; - struct page *pages[2]; - int i; - - if (!core_kernel_text((unsigned long)addr)) { - pages[0] = vmalloc_to_page(addr); - pages[1] = vmalloc_to_page(addr + PAGE_SIZE); - } else { - pages[0] = virt_to_page(addr); - WARN_ON(!PageReserved(pages[0])); - pages[1] = virt_to_page(addr + PAGE_SIZE); - } - BUG_ON(!pages[0]); - local_irq_save(flags); - set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); - if (pages[1]) - set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); - vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - clear_fixmap(FIX_TEXT_POKE0); - if (pages[1]) - clear_fixmap(FIX_TEXT_POKE1); - local_flush_tlb(); - sync_core(); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ - for (i = 0; i < len; i++) - BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); - local_irq_restore(flags); - return addr; -} - -/* - * Cross-modifying kernel text with stop_machine(). - * This code originally comes from immediate value. - */ -static atomic_t stop_machine_first; -static int wrote_text; - -struct text_poke_params { - struct text_poke_param *params; - int nparams; -}; - -static int __kprobes stop_machine_text_poke(void *data) -{ - struct text_poke_params *tpp = data; - struct text_poke_param *p; - int i; - - if (atomic_dec_and_test(&stop_machine_first)) { - for (i = 0; i < tpp->nparams; i++) { - p = &tpp->params[i]; - text_poke(p->addr, p->opcode, p->len); - } - smp_wmb(); /* Make sure other cpus see that this has run */ - wrote_text = 1; - } else { - while (!wrote_text) - cpu_relax(); - smp_mb(); /* Load wrote_text before following execution */ - } - - for (i = 0; i < tpp->nparams; i++) { - p = &tpp->params[i]; - flush_icache_range((unsigned long)p->addr, - (unsigned long)p->addr + p->len); - } - /* - * Intel Archiecture Software Developer's Manual section 7.1.3 specifies - * that a core serializing instruction such as "cpuid" should be - * executed on _each_ core before the new instruction is made visible. - */ - sync_core(); - return 0; -} - -/** - * text_poke_smp - Update instructions on a live kernel on SMP - * @addr: address to modify - * @opcode: source of the copy - * @len: length to copy - * - * Modify multi-byte instruction by using stop_machine() on SMP. This allows - * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying - * should be allowed, since stop_machine() does _not_ protect code against - * NMI and MCE. - * - * Note: Must be called under get_online_cpus() and text_mutex. - */ -void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) -{ - struct text_poke_params tpp; - struct text_poke_param p; - - p.addr = addr; - p.opcode = opcode; - p.len = len; - tpp.params = &p; - tpp.nparams = 1; - atomic_set(&stop_machine_first, 1); - wrote_text = 0; - /* Use __stop_machine() because the caller already got online_cpus. */ - __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask); - return addr; -} - -/** - * text_poke_smp_batch - Update instructions on a live kernel on SMP - * @params: an array of text_poke parameters - * @n: the number of elements in params. - * - * Modify multi-byte instruction by using stop_machine() on SMP. Since the - * stop_machine() is heavy task, it is better to aggregate text_poke requests - * and do it once if possible. - * - * Note: Must be called under get_online_cpus() and text_mutex. - */ -void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) -{ - struct text_poke_params tpp = {.params = params, .nparams = n}; - - atomic_set(&stop_machine_first, 1); - wrote_text = 0; - __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/amd_gart_64.c b/ANDROID_3.4.5/arch/x86/kernel/amd_gart_64.c deleted file mode 100644 index e6631120..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/amd_gart_64.c +++ /dev/null @@ -1,899 +0,0 @@ -/* - * Dynamic DMA mapping support for AMD Hammer. - * - * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. - * This allows to use PCI devices that only support 32bit addresses on systems - * with more than 4GB. - * - * See Documentation/DMA-API-HOWTO.txt for the interface specification. - * - * Copyright 2002 Andi Kleen, SuSE Labs. - * Subject to the GNU General Public License v2 only. - */ - -#include <linux/types.h> -#include <linux/ctype.h> -#include <linux/agp_backend.h> -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/module.h> -#include <linux/topology.h> -#include <linux/interrupt.h> -#include <linux/bitmap.h> -#include <linux/kdebug.h> -#include <linux/scatterlist.h> -#include <linux/iommu-helper.h> -#include <linux/syscore_ops.h> -#include <linux/io.h> -#include <linux/gfp.h> -#include <linux/atomic.h> -#include <asm/mtrr.h> -#include <asm/pgtable.h> -#include <asm/proto.h> -#include <asm/iommu.h> -#include <asm/gart.h> -#include <asm/cacheflush.h> -#include <asm/swiotlb.h> -#include <asm/dma.h> -#include <asm/amd_nb.h> -#include <asm/x86_init.h> -#include <asm/iommu_table.h> - -static unsigned long iommu_bus_base; /* GART remapping area (physical) */ -static unsigned long iommu_size; /* size of remapping area bytes */ -static unsigned long iommu_pages; /* .. and in pages */ - -static u32 *iommu_gatt_base; /* Remapping table */ - -static dma_addr_t bad_dma_addr; - -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -static int iommu_fullflush = 1; - -/* Allocation bitmap for the remapping area: */ -static DEFINE_SPINLOCK(iommu_bitmap_lock); -/* Guarded by iommu_bitmap_lock: */ -static unsigned long *iommu_gart_bitmap; - -static u32 gart_unmapped_entry; - -#define GPTE_VALID 1 -#define GPTE_COHERENT 2 -#define GPTE_ENCODE(x) \ - (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) -#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) - -#define EMERGENCY_PAGES 32 /* = 128KB */ - -#ifdef CONFIG_AGP -#define AGPEXTERN extern -#else -#define AGPEXTERN -#endif - -/* GART can only remap to physical addresses < 1TB */ -#define GART_MAX_PHYS_ADDR (1ULL << 40) - -/* backdoor interface to AGP driver */ -AGPEXTERN int agp_memory_reserved; -AGPEXTERN __u32 *agp_gatt_table; - -static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static bool need_flush; /* global flush state. set for each gart wrap */ - -static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask) -{ - unsigned long offset, flags; - unsigned long boundary_size; - unsigned long base_index; - - base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), - PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; - - spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, - size, base_index, boundary_size, align_mask); - if (offset == -1) { - need_flush = true; - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, - size, base_index, boundary_size, - align_mask); - } - if (offset != -1) { - next_bit = offset+size; - if (next_bit >= iommu_pages) { - next_bit = 0; - need_flush = true; - } - } - if (iommu_fullflush) - need_flush = true; - spin_unlock_irqrestore(&iommu_bitmap_lock, flags); - - return offset; -} - -static void free_iommu(unsigned long offset, int size) -{ - unsigned long flags; - - spin_lock_irqsave(&iommu_bitmap_lock, flags); - bitmap_clear(iommu_gart_bitmap, offset, size); - if (offset >= next_bit) - next_bit = offset + size; - spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} - -/* - * Use global flush state to avoid races with multiple flushers. - */ -static void flush_gart(void) -{ - unsigned long flags; - - spin_lock_irqsave(&iommu_bitmap_lock, flags); - if (need_flush) { - amd_flush_garts(); - need_flush = false; - } - spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} - -#ifdef CONFIG_IOMMU_LEAK -/* Debugging aid for drivers that don't free their IOMMU tables */ -static int leak_trace; -static int iommu_leak_pages = 20; - -static void dump_leak(void) -{ - static int dump; - - if (dump) - return; - dump = 1; - - show_stack(NULL, NULL); - debug_dma_dump_mappings(NULL); -} -#endif - -static void iommu_full(struct device *dev, size_t size, int dir) -{ - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * Return some non mapped prereserved space in the aperture and - * let the Northbridge deal with it. This will result in garbage - * in the IO operation. When the size exceeds the prereserved space - * memory corruption will occur or random memory will be DMAed - * out. Hopefully no network devices use single mappings that big. - */ - - dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size); - - if (size > PAGE_SIZE*EMERGENCY_PAGES) { - if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic("PCI-DMA: Memory would be corrupted\n"); - if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic(KERN_ERR - "PCI-DMA: Random memory would be DMAed\n"); - } -#ifdef CONFIG_IOMMU_LEAK - dump_leak(); -#endif -} - -static inline int -need_iommu(struct device *dev, unsigned long addr, size_t size) -{ - return force_iommu || !dma_capable(dev, addr, size); -} - -static inline int -nonforced_iommu(struct device *dev, unsigned long addr, size_t size) -{ - return !dma_capable(dev, addr, size); -} - -/* Map a single continuous physical area into the IOMMU. - * Caller needs to check if the iommu is needed and flush. - */ -static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask) -{ - unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); - unsigned long iommu_page; - int i; - - if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR)) - return bad_dma_addr; - - iommu_page = alloc_iommu(dev, npages, align_mask); - if (iommu_page == -1) { - if (!nonforced_iommu(dev, phys_mem, size)) - return phys_mem; - if (panic_on_overflow) - panic("dma_map_area overflow %lu bytes\n", size); - iommu_full(dev, size, dir); - return bad_dma_addr; - } - - for (i = 0; i < npages; i++) { - iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); - phys_mem += PAGE_SIZE; - } - return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); -} - -/* Map a single area into the IOMMU */ -static dma_addr_t gart_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - unsigned long bus; - phys_addr_t paddr = page_to_phys(page) + offset; - - if (!dev) - dev = &x86_dma_fallback_dev; - - if (!need_iommu(dev, paddr, size)) - return paddr; - - bus = dma_map_area(dev, paddr, size, dir, 0); - flush_gart(); - - return bus; -} - -/* - * Free a DMA mapping. - */ -static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - unsigned long iommu_page; - int npages; - int i; - - if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || - dma_addr >= iommu_bus_base + iommu_size) - return; - - iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - for (i = 0; i < npages; i++) { - iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; - } - free_iommu(iommu_page, npages); -} - -/* - * Wrapper for pci_unmap_single working with scatterlists. - */ -static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) { - if (!s->dma_length || !s->length) - break; - gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL); - } -} - -/* Fallback for dma_map_sg in case of overflow */ -static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, - int nents, int dir) -{ - struct scatterlist *s; - int i; - -#ifdef CONFIG_IOMMU_DEBUG - pr_debug("dma_map_sg overflow\n"); -#endif - - for_each_sg(sg, s, nents, i) { - unsigned long addr = sg_phys(s); - - if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0); - if (addr == bad_dma_addr) { - if (i > 0) - gart_unmap_sg(dev, sg, i, dir, NULL); - nents = 0; - sg[0].dma_length = 0; - break; - } - } - s->dma_address = addr; - s->dma_length = s->length; - } - flush_gart(); - - return nents; -} - -/* Map multiple scatterlist entries continuous into the first. */ -static int __dma_map_cont(struct device *dev, struct scatterlist *start, - int nelems, struct scatterlist *sout, - unsigned long pages) -{ - unsigned long iommu_start = alloc_iommu(dev, pages, 0); - unsigned long iommu_page = iommu_start; - struct scatterlist *s; - int i; - - if (iommu_start == -1) - return -1; - - for_each_sg(start, s, nelems, i) { - unsigned long pages, addr; - unsigned long phys_addr = s->dma_address; - - BUG_ON(s != start && s->offset); - if (s == start) { - sout->dma_address = iommu_bus_base; - sout->dma_address += iommu_page*PAGE_SIZE + s->offset; - sout->dma_length = s->length; - } else { - sout->dma_length += s->length; - } - - addr = phys_addr; - pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); - while (pages--) { - iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); - addr += PAGE_SIZE; - iommu_page++; - } - } - BUG_ON(iommu_page - iommu_start != pages); - - return 0; -} - -static inline int -dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, - struct scatterlist *sout, unsigned long pages, int need) -{ - if (!need) { - BUG_ON(nelems != 1); - sout->dma_address = start->dma_address; - sout->dma_length = start->length; - return 0; - } - return __dma_map_cont(dev, start, nelems, sout, pages); -} - -/* - * DMA map all entries in a scatterlist. - * Merge chunks that have page aligned sizes into a continuous mapping. - */ -static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) -{ - struct scatterlist *s, *ps, *start_sg, *sgmap; - int need = 0, nextneed, i, out, start; - unsigned long pages = 0; - unsigned int seg_size; - unsigned int max_seg_size; - - if (nents == 0) - return 0; - - if (!dev) - dev = &x86_dma_fallback_dev; - - out = 0; - start = 0; - start_sg = sg; - sgmap = sg; - seg_size = 0; - max_seg_size = dma_get_max_seg_size(dev); - ps = NULL; /* shut up gcc */ - - for_each_sg(sg, s, nents, i) { - dma_addr_t addr = sg_phys(s); - - s->dma_address = addr; - BUG_ON(s->length == 0); - - nextneed = need_iommu(dev, addr, s->length); - - /* Handle the previous not yet processed entries */ - if (i > start) { - /* - * Can only merge when the last chunk ends on a - * page boundary and the new one doesn't have an - * offset. - */ - if (!iommu_merge || !nextneed || !need || s->offset || - (s->length + seg_size > max_seg_size) || - (ps->offset + ps->length) % PAGE_SIZE) { - if (dma_map_cont(dev, start_sg, i - start, - sgmap, pages, need) < 0) - goto error; - out++; - - seg_size = 0; - sgmap = sg_next(sgmap); - pages = 0; - start = i; - start_sg = s; - } - } - - seg_size += s->length; - need = nextneed; - pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE); - ps = s; - } - if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) - goto error; - out++; - flush_gart(); - if (out < nents) { - sgmap = sg_next(sgmap); - sgmap->dma_length = 0; - } - return out; - -error: - flush_gart(); - gart_unmap_sg(dev, sg, out, dir, NULL); - - /* When it was forced or merged try again in a dumb way */ - if (force_iommu || iommu_merge) { - out = dma_map_sg_nonforce(dev, sg, nents, dir); - if (out > 0) - return out; - } - if (panic_on_overflow) - panic("dma_map_sg: overflow on %lu pages\n", pages); - - iommu_full(dev, pages << PAGE_SHIFT, dir); - for_each_sg(sg, s, nents, i) - s->dma_address = bad_dma_addr; - return 0; -} - -/* allocate and map a coherent mapping */ -static void * -gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, - gfp_t flag, struct dma_attrs *attrs) -{ - dma_addr_t paddr; - unsigned long align_mask; - struct page *page; - - if (force_iommu && !(flag & GFP_DMA)) { - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - page = alloc_pages(flag | __GFP_ZERO, get_order(size)); - if (!page) - return NULL; - - align_mask = (1UL << get_order(size)) - 1; - paddr = dma_map_area(dev, page_to_phys(page), size, - DMA_BIDIRECTIONAL, align_mask); - - flush_gart(); - if (paddr != bad_dma_addr) { - *dma_addr = paddr; - return page_address(page); - } - __free_pages(page, get_order(size)); - } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag, - attrs); - - return NULL; -} - -/* free a coherent mapping */ -static void -gart_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) -{ - gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); - free_pages((unsigned long)vaddr, get_order(size)); -} - -static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return (dma_addr == bad_dma_addr); -} - -static int no_agp; - -static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) -{ - unsigned long a; - - if (!iommu_size) { - iommu_size = aper_size; - if (!no_agp) - iommu_size /= 2; - } - - a = aper + iommu_size; - iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; - - if (iommu_size < 64*1024*1024) { - pr_warning( - "PCI-DMA: Warning: Small IOMMU %luMB." - " Consider increasing the AGP aperture in BIOS\n", - iommu_size >> 20); - } - - return iommu_size; -} - -static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) -{ - unsigned aper_size = 0, aper_base_32, aper_order; - u64 aper_base; - - pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32); - pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order); - aper_order = (aper_order >> 1) & 7; - - aper_base = aper_base_32 & 0x7fff; - aper_base <<= 25; - - aper_size = (32 * 1024 * 1024) << aper_order; - if (aper_base + aper_size > 0x100000000UL || !aper_size) - aper_base = 0; - - *size = aper_size; - return aper_base; -} - -static void enable_gart_translations(void) -{ - int i; - - if (!amd_nb_has_feature(AMD_NB_GART)) - return; - - for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; - - enable_gart_translation(dev, __pa(agp_gatt_table)); - } - - /* Flush the GART-TLB to remove stale entries */ - amd_flush_garts(); -} - -/* - * If fix_up_north_bridges is set, the north bridges have to be fixed up on - * resume in the same way as they are handled in gart_iommu_hole_init(). - */ -static bool fix_up_north_bridges; -static u32 aperture_order; -static u32 aperture_alloc; - -void set_up_gart_resume(u32 aper_order, u32 aper_alloc) -{ - fix_up_north_bridges = true; - aperture_order = aper_order; - aperture_alloc = aper_alloc; -} - -static void gart_fixup_northbridges(void) -{ - int i; - - if (!fix_up_north_bridges) - return; - - if (!amd_nb_has_feature(AMD_NB_GART)) - return; - - pr_info("PCI-DMA: Restoring GART aperture settings\n"); - - for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; - - /* - * Don't enable translations just yet. That is the next - * step. Restore the pre-suspend aperture settings. - */ - gart_set_size_and_enable(dev, aperture_order); - pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); - } -} - -static void gart_resume(void) -{ - pr_info("PCI-DMA: Resuming GART IOMMU\n"); - - gart_fixup_northbridges(); - - enable_gart_translations(); -} - -static struct syscore_ops gart_syscore_ops = { - .resume = gart_resume, - -}; - -/* - * Private Northbridge GATT initialization in case we cannot use the - * AGP driver for some reason. - */ -static __init int init_amd_gatt(struct agp_kern_info *info) -{ - unsigned aper_size, gatt_size, new_aper_size; - unsigned aper_base, new_aper_base; - struct pci_dev *dev; - void *gatt; - int i; - - pr_info("PCI-DMA: Disabling AGP.\n"); - - aper_size = aper_base = info->aper_size = 0; - dev = NULL; - for (i = 0; i < amd_nb_num(); i++) { - dev = node_to_amd_nb(i)->misc; - new_aper_base = read_aperture(dev, &new_aper_size); - if (!new_aper_base) - goto nommu; - - if (!aper_base) { - aper_size = new_aper_size; - aper_base = new_aper_base; - } - if (aper_size != new_aper_size || aper_base != new_aper_base) - goto nommu; - } - if (!aper_base) - goto nommu; - - info->aper_base = aper_base; - info->aper_size = aper_size >> 20; - - gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(gatt_size)); - if (!gatt) - panic("Cannot allocate GATT table"); - if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) - panic("Could not set GART PTEs to uncacheable pages"); - - agp_gatt_table = gatt; - - register_syscore_ops(&gart_syscore_ops); - - flush_gart(); - - pr_info("PCI-DMA: aperture base @ %x size %u KB\n", - aper_base, aper_size>>10); - - return 0; - - nommu: - /* Should not happen anymore */ - pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" - "falling back to iommu=soft.\n"); - return -1; -} - -static struct dma_map_ops gart_dma_ops = { - .map_sg = gart_map_sg, - .unmap_sg = gart_unmap_sg, - .map_page = gart_map_page, - .unmap_page = gart_unmap_page, - .alloc = gart_alloc_coherent, - .free = gart_free_coherent, - .mapping_error = gart_mapping_error, -}; - -static void gart_iommu_shutdown(void) -{ - struct pci_dev *dev; - int i; - - /* don't shutdown it if there is AGP installed */ - if (!no_agp) - return; - - if (!amd_nb_has_feature(AMD_NB_GART)) - return; - - for (i = 0; i < amd_nb_num(); i++) { - u32 ctl; - - dev = node_to_amd_nb(i)->misc; - pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - - ctl &= ~GARTEN; - - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); - } -} - -int __init gart_iommu_init(void) -{ - struct agp_kern_info info; - unsigned long iommu_start; - unsigned long aper_base, aper_size; - unsigned long start_pfn, end_pfn; - unsigned long scratch; - long i; - - if (!amd_nb_has_feature(AMD_NB_GART)) - return 0; - -#ifndef CONFIG_AGP_AMD64 - no_agp = 1; -#else - /* Makefile puts PCI initialization via subsys_initcall first. */ - /* Add other AMD AGP bridge drivers here */ - no_agp = no_agp || - (agp_amd64_init() < 0) || - (agp_copy_info(agp_bridge, &info) < 0); -#endif - - if (no_iommu || - (!force_iommu && max_pfn <= MAX_DMA32_PFN) || - !gart_iommu_aperture || - (no_agp && init_amd_gatt(&info) < 0)) { - if (max_pfn > MAX_DMA32_PFN) { - pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); - pr_warning("falling back to iommu=soft.\n"); - } - return 0; - } - - /* need to map that range */ - aper_size = info.aper_size << 20; - aper_base = info.aper_base; - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); - - if (end_pfn > max_low_pfn_mapped) { - start_pfn = (aper_base>>PAGE_SHIFT); - init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); - } - - pr_info("PCI-DMA: using GART IOMMU.\n"); - iommu_size = check_iommu_size(info.aper_base, aper_size); - iommu_pages = iommu_size >> PAGE_SHIFT; - - iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(iommu_pages/8)); - if (!iommu_gart_bitmap) - panic("Cannot allocate iommu bitmap\n"); - -#ifdef CONFIG_IOMMU_LEAK - if (leak_trace) { - int ret; - - ret = dma_debug_resize_entries(iommu_pages); - if (ret) - pr_debug("PCI-DMA: Cannot trace all the entries\n"); - } -#endif - - /* - * Out of IOMMU space handling. - * Reserve some invalid pages at the beginning of the GART. - */ - bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - - pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", - iommu_size >> 20); - - agp_memory_reserved = iommu_size; - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; - bad_dma_addr = iommu_bus_base; - iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); - - /* - * Unmap the IOMMU part of the GART. The alias of the page is - * always mapped with cache enabled and there is no full cache - * coherency across the GART remapping. The unmapping avoids - * automatic prefetches from the CPU allocating cache lines in - * there. All CPU accesses are done via the direct mapping to - * the backing memory. The GART address is only used by PCI - * devices. - */ - set_memory_np((unsigned long)__va(iommu_bus_base), - iommu_size >> PAGE_SHIFT); - /* - * Tricky. The GART table remaps the physical memory range, - * so the CPU wont notice potential aliases and if the memory - * is remapped to UC later on, we might surprise the PCI devices - * with a stray writeout of a cacheline. So play it sure and - * do an explicit, full-scale wbinvd() _after_ having marked all - * the pages as Not-Present: - */ - wbinvd(); - - /* - * Now all caches are flushed and we can safely enable - * GART hardware. Doing it early leaves the possibility - * of stale cache entries that can lead to GART PTE - * errors. - */ - enable_gart_translations(); - - /* - * Try to workaround a bug (thanks to BenH): - * Set unmapped entries to a scratch page instead of 0. - * Any prefetches that hit unmapped entries won't get an bus abort - * then. (P2P bridge may be prefetching on DMA reads). - */ - scratch = get_zeroed_page(GFP_KERNEL); - if (!scratch) - panic("Cannot allocate iommu scratch page"); - gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); - for (i = EMERGENCY_PAGES; i < iommu_pages; i++) - iommu_gatt_base[i] = gart_unmapped_entry; - - flush_gart(); - dma_ops = &gart_dma_ops; - x86_platform.iommu_shutdown = gart_iommu_shutdown; - swiotlb = 0; - - return 0; -} - -void __init gart_parse_options(char *p) -{ - int arg; - -#ifdef CONFIG_IOMMU_LEAK - if (!strncmp(p, "leak", 4)) { - leak_trace = 1; - p += 4; - if (*p == '=') - ++p; - if (isdigit(*p) && get_option(&p, &arg)) - iommu_leak_pages = arg; - } -#endif - if (isdigit(*p) && get_option(&p, &arg)) - iommu_size = arg; - if (!strncmp(p, "fullflush", 9)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; - if (!strncmp(p, "noagp", 5)) - no_agp = 1; - if (!strncmp(p, "noaperture", 10)) - fix_aperture = 0; - /* duplicated from pci-dma.c */ - if (!strncmp(p, "force", 5)) - gart_iommu_aperture_allowed = 1; - if (!strncmp(p, "allowed", 7)) - gart_iommu_aperture_allowed = 1; - if (!strncmp(p, "memaper", 7)) { - fallback_aper_force = 1; - p += 7; - if (*p == '=') { - ++p; - if (get_option(&p, &arg)) - fallback_aper_order = arg; - } - } -} -IOMMU_INIT_POST(gart_iommu_hole_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/amd_nb.c b/ANDROID_3.4.5/arch/x86/kernel/amd_nb.c deleted file mode 100644 index be168545..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/amd_nb.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Shared support code for AMD K8 northbridges and derivates. - * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. - */ -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <asm/amd_nb.h> - -static u32 *flush_words; - -const struct pci_device_id amd_nb_misc_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, - {} -}; -EXPORT_SYMBOL(amd_nb_misc_ids); - -static struct pci_device_id amd_nb_link_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, - {} -}; - -const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { - { 0x00, 0x18, 0x20 }, - { 0xff, 0x00, 0x20 }, - { 0xfe, 0x00, 0x20 }, - { } -}; - -struct amd_northbridge_info amd_northbridges; -EXPORT_SYMBOL(amd_northbridges); - -static struct pci_dev *next_northbridge(struct pci_dev *dev, - const struct pci_device_id *ids) -{ - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (!dev) - break; - } while (!pci_match_id(ids, dev)); - return dev; -} - -int amd_cache_northbridges(void) -{ - u16 i = 0; - struct amd_northbridge *nb; - struct pci_dev *misc, *link; - - if (amd_nb_num()) - return 0; - - misc = NULL; - while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) - i++; - - if (i == 0) - return 0; - - nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); - if (!nb) - return -ENOMEM; - - amd_northbridges.nb = nb; - amd_northbridges.num = i; - - link = misc = NULL; - for (i = 0; i != amd_nb_num(); i++) { - node_to_amd_nb(i)->misc = misc = - next_northbridge(misc, amd_nb_misc_ids); - node_to_amd_nb(i)->link = link = - next_northbridge(link, amd_nb_link_ids); - } - - /* some CPU families (e.g. family 0x11) do not support GART */ - if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || - boot_cpu_data.x86 == 0x15) - amd_northbridges.flags |= AMD_NB_GART; - - /* - * Some CPU families support L3 Cache Index Disable. There are some - * limitations because of E382 and E388 on family 0x10. - */ - if (boot_cpu_data.x86 == 0x10 && - boot_cpu_data.x86_model >= 0x8 && - (boot_cpu_data.x86_model > 0x9 || - boot_cpu_data.x86_mask >= 0x1)) - amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; - - if (boot_cpu_data.x86 == 0x15) - amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; - - /* L3 cache partitioning is supported on family 0x15 */ - if (boot_cpu_data.x86 == 0x15) - amd_northbridges.flags |= AMD_NB_L3_PARTITIONING; - - return 0; -} -EXPORT_SYMBOL_GPL(amd_cache_northbridges); - -/* - * Ignores subdevice/subvendor but as far as I can figure out - * they're useless anyways - */ -bool __init early_is_amd_nb(u32 device) -{ - const struct pci_device_id *id; - u32 vendor = device & 0xffff; - - device >>= 16; - for (id = amd_nb_misc_ids; id->vendor; id++) - if (vendor == id->vendor && device == id->device) - return true; - return false; -} - -struct resource *amd_get_mmconfig_range(struct resource *res) -{ - u32 address; - u64 base, msr; - unsigned segn_busn_bits; - - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return NULL; - - /* assume all cpus from fam10h have mmconfig */ - if (boot_cpu_data.x86 < 0x10) - return NULL; - - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - - /* mmconfig is not enabled */ - if (!(msr & FAM10H_MMIO_CONF_ENABLE)) - return NULL; - - base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); - - segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & - FAM10H_MMIO_CONF_BUSRANGE_MASK; - - res->flags = IORESOURCE_MEM; - res->start = base; - res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1; - return res; -} - -int amd_get_subcaches(int cpu) -{ - struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; - unsigned int mask; - int cuid; - - if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - return 0; - - pci_read_config_dword(link, 0x1d4, &mask); - - cuid = cpu_data(cpu).compute_unit_id; - return (mask >> (4 * cuid)) & 0xf; -} - -int amd_set_subcaches(int cpu, int mask) -{ - static unsigned int reset, ban; - struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); - unsigned int reg; - int cuid; - - if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) - return -EINVAL; - - /* if necessary, collect reset state of L3 partitioning and BAN mode */ - if (reset == 0) { - pci_read_config_dword(nb->link, 0x1d4, &reset); - pci_read_config_dword(nb->misc, 0x1b8, &ban); - ban &= 0x180000; - } - - /* deactivate BAN mode if any subcaches are to be disabled */ - if (mask != 0xf) { - pci_read_config_dword(nb->misc, 0x1b8, ®); - pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); - } - - cuid = cpu_data(cpu).compute_unit_id; - mask <<= 4 * cuid; - mask |= (0xf ^ (1 << cuid)) << 26; - - pci_write_config_dword(nb->link, 0x1d4, mask); - - /* reset BAN mode if L3 partitioning returned to reset state */ - pci_read_config_dword(nb->link, 0x1d4, ®); - if (reg == reset) { - pci_read_config_dword(nb->misc, 0x1b8, ®); - reg &= ~0x180000; - pci_write_config_dword(nb->misc, 0x1b8, reg | ban); - } - - return 0; -} - -static int amd_cache_gart(void) -{ - u16 i; - - if (!amd_nb_has_feature(AMD_NB_GART)) - return 0; - - flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); - if (!flush_words) { - amd_northbridges.flags &= ~AMD_NB_GART; - return -ENOMEM; - } - - for (i = 0; i != amd_nb_num(); i++) - pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, - &flush_words[i]); - - return 0; -} - -void amd_flush_garts(void) -{ - int flushed, i; - unsigned long flags; - static DEFINE_SPINLOCK(gart_lock); - - if (!amd_nb_has_feature(AMD_NB_GART)) - return; - - /* Avoid races between AGP and IOMMU. In theory it's not needed - but I'm not sure if the hardware won't lose flush requests - when another is pending. This whole thing is so expensive anyways - that it doesn't matter to serialize more. -AK */ - spin_lock_irqsave(&gart_lock, flags); - flushed = 0; - for (i = 0; i < amd_nb_num(); i++) { - pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c, - flush_words[i] | 1); - flushed++; - } - for (i = 0; i < amd_nb_num(); i++) { - u32 w; - /* Make sure the hardware actually executed the flush*/ - for (;;) { - pci_read_config_dword(node_to_amd_nb(i)->misc, - 0x9c, &w); - if (!(w & 1)) - break; - cpu_relax(); - } - } - spin_unlock_irqrestore(&gart_lock, flags); - if (!flushed) - printk("nothing to flush?\n"); -} -EXPORT_SYMBOL_GPL(amd_flush_garts); - -static __init int init_amd_nbs(void) -{ - int err = 0; - - err = amd_cache_northbridges(); - - if (err < 0) - printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); - - if (amd_cache_gart() < 0) - printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " - "GART support disabled.\n"); - - return err; -} - -/* This has to go after the PCI subsystem */ -fs_initcall(init_amd_nbs); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apb_timer.c b/ANDROID_3.4.5/arch/x86/kernel/apb_timer.c deleted file mode 100644 index afdc3f75..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apb_timer.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * apb_timer.c: Driver for Langwell APB timers - * - * (C) Copyright 2009 Intel Corporation - * Author: Jacob Pan (jacob.jun.pan@intel.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Note: - * Langwell is the south complex of Intel Moorestown MID platform. There are - * eight external timers in total that can be used by the operating system. - * The timer information, such as frequency and addresses, is provided to the - * OS via SFI tables. - * Timer interrupts are routed via FW/HW emulated IOAPIC independently via - * individual redirection table entries (RTE). - * Unlike HPET, there is no master counter, therefore one of the timers are - * used as clocksource. The overall allocation looks like: - * - timer 0 - NR_CPUs for per cpu timer - * - one timer for clocksource - * - one timer for watchdog driver. - * It is also worth notice that APB timer does not support true one-shot mode, - * free-running mode will be used here to emulate one-shot mode. - * APB timer can also be used as broadcast timer along with per cpu local APIC - * timer, but by default APB timer has higher rating than local APIC timers. - */ - -#include <linux/delay.h> -#include <linux/dw_apb_timer.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/pm.h> -#include <linux/sfi.h> -#include <linux/interrupt.h> -#include <linux/cpu.h> -#include <linux/irq.h> - -#include <asm/fixmap.h> -#include <asm/apb_timer.h> -#include <asm/mrst.h> -#include <asm/time.h> - -#define APBT_CLOCKEVENT_RATING 110 -#define APBT_CLOCKSOURCE_RATING 250 - -#define APBT_CLOCKEVENT0_NUM (0) -#define APBT_CLOCKSOURCE_NUM (2) - -static phys_addr_t apbt_address; -static int apb_timer_block_enabled; -static void __iomem *apbt_virt_address; - -/* - * Common DW APB timer info - */ -static unsigned long apbt_freq; - -struct apbt_dev { - struct dw_apb_clock_event_device *timer; - unsigned int num; - int cpu; - unsigned int irq; - char name[10]; -}; - -static struct dw_apb_clocksource *clocksource_apbt; - -static inline void __iomem *adev_virt_addr(struct apbt_dev *adev) -{ - return apbt_virt_address + adev->num * APBTMRS_REG_SIZE; -} - -static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); - -#ifdef CONFIG_SMP -static unsigned int apbt_num_timers_used; -#endif - -static inline void apbt_set_mapping(void) -{ - struct sfi_timer_table_entry *mtmr; - int phy_cs_timer_id = 0; - - if (apbt_virt_address) { - pr_debug("APBT base already mapped\n"); - return; - } - mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); - if (mtmr == NULL) { - printk(KERN_ERR "Failed to get MTMR %d from SFI\n", - APBT_CLOCKEVENT0_NUM); - return; - } - apbt_address = (phys_addr_t)mtmr->phys_addr; - if (!apbt_address) { - printk(KERN_WARNING "No timer base from SFI, use default\n"); - apbt_address = APBT_DEFAULT_BASE; - } - apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); - if (!apbt_virt_address) { - pr_debug("Failed mapping APBT phy address at %lu\n",\ - (unsigned long)apbt_address); - goto panic_noapbt; - } - apbt_freq = mtmr->freq_hz; - sfi_free_mtmr(mtmr); - - /* Now figure out the physical timer id for clocksource device */ - mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM); - if (mtmr == NULL) - goto panic_noapbt; - - /* Now figure out the physical timer id */ - pr_debug("Use timer %d for clocksource\n", - (int)(mtmr->phys_addr & 0xff) / APBTMRS_REG_SIZE); - phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) / - APBTMRS_REG_SIZE; - - clocksource_apbt = dw_apb_clocksource_init(APBT_CLOCKSOURCE_RATING, - "apbt0", apbt_virt_address + phy_cs_timer_id * - APBTMRS_REG_SIZE, apbt_freq); - return; - -panic_noapbt: - panic("Failed to setup APB system timer\n"); - -} - -static inline void apbt_clear_mapping(void) -{ - iounmap(apbt_virt_address); - apbt_virt_address = NULL; -} - -/* - * APBT timer interrupt enable / disable - */ -static inline int is_apbt_capable(void) -{ - return apbt_virt_address ? 1 : 0; -} - -static int __init apbt_clockevent_register(void) -{ - struct sfi_timer_table_entry *mtmr; - struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); - - mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); - if (mtmr == NULL) { - printk(KERN_ERR "Failed to get MTMR %d from SFI\n", - APBT_CLOCKEVENT0_NUM); - return -ENODEV; - } - - adev->num = smp_processor_id(); - adev->timer = dw_apb_clockevent_init(smp_processor_id(), "apbt0", - mrst_timer_options == MRST_TIMER_LAPIC_APBT ? - APBT_CLOCKEVENT_RATING - 100 : APBT_CLOCKEVENT_RATING, - adev_virt_addr(adev), 0, apbt_freq); - /* Firmware does EOI handling for us. */ - adev->timer->eoi = NULL; - - if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { - global_clock_event = &adev->timer->ced; - printk(KERN_DEBUG "%s clockevent registered as global\n", - global_clock_event->name); - } - - dw_apb_clockevent_register(adev->timer); - - sfi_free_mtmr(mtmr); - return 0; -} - -#ifdef CONFIG_SMP - -static void apbt_setup_irq(struct apbt_dev *adev) -{ - /* timer0 irq has been setup early */ - if (adev->irq == 0) - return; - - irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); - irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); - /* APB timer irqs are set up as mp_irqs, timer is edge type */ - __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge"); -} - -/* Should be called with per cpu */ -void apbt_setup_secondary_clock(void) -{ - struct apbt_dev *adev; - int cpu; - - /* Don't register boot CPU clockevent */ - cpu = smp_processor_id(); - if (!cpu) - return; - - adev = &__get_cpu_var(cpu_apbt_dev); - if (!adev->timer) { - adev->timer = dw_apb_clockevent_init(cpu, adev->name, - APBT_CLOCKEVENT_RATING, adev_virt_addr(adev), - adev->irq, apbt_freq); - adev->timer->eoi = NULL; - } else { - dw_apb_clockevent_resume(adev->timer); - } - - printk(KERN_INFO "Registering CPU %d clockevent device %s, cpu %08x\n", - cpu, adev->name, adev->cpu); - - apbt_setup_irq(adev); - dw_apb_clockevent_register(adev->timer); - - return; -} - -/* - * this notify handler process CPU hotplug events. in case of S0i3, nonboot - * cpus are disabled/enabled frequently, for performance reasons, we keep the - * per cpu timer irq registered so that we do need to do free_irq/request_irq. - * - * TODO: it might be more reliable to directly disable percpu clockevent device - * without the notifier chain. currently, cpu 0 may get interrupts from other - * cpu timers during the offline process due to the ordering of notification. - * the extra interrupt is harmless. - */ -static int apbt_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) -{ - unsigned long cpu = (unsigned long)hcpu; - struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); - - switch (action & 0xf) { - case CPU_DEAD: - dw_apb_clockevent_pause(adev->timer); - if (system_state == SYSTEM_RUNNING) { - pr_debug("skipping APBT CPU %lu offline\n", cpu); - } else if (adev) { - pr_debug("APBT clockevent for cpu %lu offline\n", cpu); - dw_apb_clockevent_stop(adev->timer); - } - break; - default: - pr_debug("APBT notified %lu, no action\n", action); - } - return NOTIFY_OK; -} - -static __init int apbt_late_init(void) -{ - if (mrst_timer_options == MRST_TIMER_LAPIC_APBT || - !apb_timer_block_enabled) - return 0; - /* This notifier should be called after workqueue is ready */ - hotcpu_notifier(apbt_cpuhp_notify, -20); - return 0; -} -fs_initcall(apbt_late_init); -#else - -void apbt_setup_secondary_clock(void) {} - -#endif /* CONFIG_SMP */ - -static int apbt_clocksource_register(void) -{ - u64 start, now; - cycle_t t1; - - /* Start the counter, use timer 2 as source, timer 0/1 for event */ - dw_apb_clocksource_start(clocksource_apbt); - - /* Verify whether apbt counter works */ - t1 = dw_apb_clocksource_read(clocksource_apbt); - rdtscll(start); - - /* - * We don't know the TSC frequency yet, but waiting for - * 200000 TSC cycles is safe: - * 4 GHz == 50us - * 1 GHz == 200us - */ - do { - rep_nop(); - rdtscll(now); - } while ((now - start) < 200000UL); - - /* APBT is the only always on clocksource, it has to work! */ - if (t1 == dw_apb_clocksource_read(clocksource_apbt)) - panic("APBT counter not counting. APBT disabled\n"); - - dw_apb_clocksource_register(clocksource_apbt); - - return 0; -} - -/* - * Early setup the APBT timer, only use timer 0 for booting then switch to - * per CPU timer if possible. - * returns 1 if per cpu apbt is setup - * returns 0 if no per cpu apbt is chosen - * panic if set up failed, this is the only platform timer on Moorestown. - */ -void __init apbt_time_init(void) -{ -#ifdef CONFIG_SMP - int i; - struct sfi_timer_table_entry *p_mtmr; - unsigned int percpu_timer; - struct apbt_dev *adev; -#endif - - if (apb_timer_block_enabled) - return; - apbt_set_mapping(); - if (!apbt_virt_address) - goto out_noapbt; - /* - * Read the frequency and check for a sane value, for ESL model - * we extend the possible clock range to allow time scaling. - */ - - if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { - pr_debug("APBT has invalid freq 0x%lx\n", apbt_freq); - goto out_noapbt; - } - if (apbt_clocksource_register()) { - pr_debug("APBT has failed to register clocksource\n"); - goto out_noapbt; - } - if (!apbt_clockevent_register()) - apb_timer_block_enabled = 1; - else { - pr_debug("APBT has failed to register clockevent\n"); - goto out_noapbt; - } -#ifdef CONFIG_SMP - /* kernel cmdline disable apb timer, so we will use lapic timers */ - if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { - printk(KERN_INFO "apbt: disabled per cpu timer\n"); - return; - } - pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); - if (num_possible_cpus() <= sfi_mtimer_num) { - percpu_timer = 1; - apbt_num_timers_used = num_possible_cpus(); - } else { - percpu_timer = 0; - apbt_num_timers_used = 1; - } - pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); - - /* here we set up per CPU timer data structure */ - for (i = 0; i < apbt_num_timers_used; i++) { - adev = &per_cpu(cpu_apbt_dev, i); - adev->num = i; - adev->cpu = i; - p_mtmr = sfi_get_mtmr(i); - if (p_mtmr) - adev->irq = p_mtmr->irq; - else - printk(KERN_ERR "Failed to get timer for cpu %d\n", i); - snprintf(adev->name, sizeof(adev->name) - 1, "apbt%d", i); - } -#endif - - return; - -out_noapbt: - apbt_clear_mapping(); - apb_timer_block_enabled = 0; - panic("failed to enable APB timer\n"); -} - -/* called before apb_timer_enable, use early map */ -unsigned long apbt_quick_calibrate(void) -{ - int i, scale; - u64 old, new; - cycle_t t1, t2; - unsigned long khz = 0; - u32 loop, shift; - - apbt_set_mapping(); - dw_apb_clocksource_start(clocksource_apbt); - - /* check if the timer can count down, otherwise return */ - old = dw_apb_clocksource_read(clocksource_apbt); - i = 10000; - while (--i) { - if (old != dw_apb_clocksource_read(clocksource_apbt)) - break; - } - if (!i) - goto failed; - - /* count 16 ms */ - loop = (apbt_freq / 1000) << 4; - - /* restart the timer to ensure it won't get to 0 in the calibration */ - dw_apb_clocksource_start(clocksource_apbt); - - old = dw_apb_clocksource_read(clocksource_apbt); - old += loop; - - t1 = __native_read_tsc(); - - do { - new = dw_apb_clocksource_read(clocksource_apbt); - } while (new < old); - - t2 = __native_read_tsc(); - - shift = 5; - if (unlikely(loop >> shift == 0)) { - printk(KERN_INFO - "APBT TSC calibration failed, not enough resolution\n"); - return 0; - } - scale = (int)div_u64((t2 - t1), loop >> shift); - khz = (scale * (apbt_freq / 1000)) >> shift; - printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); - return khz; -failed: - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/aperture_64.c b/ANDROID_3.4.5/arch/x86/kernel/aperture_64.c deleted file mode 100644 index 6e76c191..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/aperture_64.c +++ /dev/null @@ -1,523 +0,0 @@ -/* - * Firmware replacement code. - * - * Work around broken BIOSes that don't set an aperture, only set the - * aperture in the AGP bridge, or set too small aperture. - * - * If all fails map the aperture over some low memory. This is cheaper than - * doing bounce buffering. The memory is lost. This is done at early boot - * because only the bootmem allocator can allocate 32+MB. - * - * Copyright 2002 Andi Kleen, SuSE Labs. - */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/memblock.h> -#include <linux/mmzone.h> -#include <linux/pci_ids.h> -#include <linux/pci.h> -#include <linux/bitops.h> -#include <linux/ioport.h> -#include <linux/suspend.h> -#include <linux/kmemleak.h> -#include <asm/e820.h> -#include <asm/io.h> -#include <asm/iommu.h> -#include <asm/gart.h> -#include <asm/pci-direct.h> -#include <asm/dma.h> -#include <asm/amd_nb.h> -#include <asm/x86_init.h> - -/* - * Using 512M as goal, in case kexec will load kernel_big - * that will do the on-position decompress, and could overlap with - * with the gart aperture that is used. - * Sequence: - * kernel_small - * ==> kexec (with kdump trigger path or gart still enabled) - * ==> kernel_small (gart area become e820_reserved) - * ==> kexec (with kdump trigger path or gart still enabled) - * ==> kerne_big (uncompressed size will be big than 64M or 128M) - * So don't use 512M below as gart iommu, leave the space for kernel - * code for safe. - */ -#define GART_MIN_ADDR (512ULL << 20) -#define GART_MAX_ADDR (1ULL << 32) - -int gart_iommu_aperture; -int gart_iommu_aperture_disabled __initdata; -int gart_iommu_aperture_allowed __initdata; - -int fallback_aper_order __initdata = 1; /* 64MB */ -int fallback_aper_force __initdata; - -int fix_aperture __initdata = 1; - -static struct resource gart_resource = { - .name = "GART", - .flags = IORESOURCE_MEM, -}; - -static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) -{ - gart_resource.start = aper_base; - gart_resource.end = aper_base + aper_size - 1; - insert_resource(&iomem_resource, &gart_resource); -} - -/* This code runs before the PCI subsystem is initialized, so just - access the northbridge directly. */ - -static u32 __init allocate_aperture(void) -{ - u32 aper_size; - unsigned long addr; - - /* aper_size should <= 1G */ - if (fallback_aper_order > 5) - fallback_aper_order = 5; - aper_size = (32 * 1024 * 1024) << fallback_aper_order; - - /* - * Aperture has to be naturally aligned. This means a 2GB aperture - * won't have much chance of finding a place in the lower 4GB of - * memory. Unfortunately we cannot move it up because that would - * make the IOMMU useless. - */ - addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, - aper_size, aper_size); - if (!addr || addr + aper_size > GART_MAX_ADDR) { - printk(KERN_ERR - "Cannot allocate aperture memory hole (%lx,%uK)\n", - addr, aper_size>>10); - return 0; - } - memblock_reserve(addr, aper_size); - /* - * Kmemleak should not scan this block as it may not be mapped via the - * kernel direct mapping. - */ - kmemleak_ignore(phys_to_virt(addr)); - printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", - aper_size >> 10, addr); - insert_aperture_resource((u32)addr, aper_size); - register_nosave_region(addr >> PAGE_SHIFT, - (addr+aper_size) >> PAGE_SHIFT); - - return (u32)addr; -} - - -/* Find a PCI capability */ -static u32 __init find_cap(int bus, int slot, int func, int cap) -{ - int bytes; - u8 pos; - - if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) & - PCI_STATUS_CAP_LIST)) - return 0; - - pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST); - for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { - u8 id; - - pos &= ~3; - id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); - if (id == 0xff) - break; - if (id == cap) - return pos; - pos = read_pci_config_byte(bus, slot, func, - pos+PCI_CAP_LIST_NEXT); - } - return 0; -} - -/* Read a standard AGPv3 bridge header */ -static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) -{ - u32 apsize; - u32 apsizereg; - int nbits; - u32 aper_low, aper_hi; - u64 aper; - u32 old_order; - - printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); - apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); - if (apsizereg == 0xffffffff) { - printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); - return 0; - } - - /* old_order could be the value from NB gart setting */ - old_order = *order; - - apsize = apsizereg & 0xfff; - /* Some BIOS use weird encodings not in the AGPv3 table. */ - if (apsize & 0xff) - apsize |= 0xf00; - nbits = hweight16(apsize); - *order = 7 - nbits; - if ((int)*order < 0) /* < 32MB */ - *order = 0; - - aper_low = read_pci_config(bus, slot, func, 0x10); - aper_hi = read_pci_config(bus, slot, func, 0x14); - aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); - - /* - * On some sick chips, APSIZE is 0. It means it wants 4G - * so let double check that order, and lets trust AMD NB settings: - */ - printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", - aper, 32 << old_order); - if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { - printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", - 32 << *order, apsizereg); - *order = old_order; - } - - printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", - aper, 32 << *order, apsizereg); - - if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) - return 0; - return (u32)aper; -} - -/* - * Look for an AGP bridge. Windows only expects the aperture in the - * AGP bridge and some BIOS forget to initialize the Northbridge too. - * Work around this here. - * - * Do an PCI bus scan by hand because we're running before the PCI - * subsystem. - * - * All AMD AGP bridges are AGPv3 compliant, so we can do this scan - * generically. It's probably overkill to always scan all slots because - * the AGP bridges should be always an own bus on the HT hierarchy, - * but do it here for future safety. - */ -static u32 __init search_agp_bridge(u32 *order, int *valid_agp) -{ - int bus, slot, func; - - /* Poor man's PCI discovery */ - for (bus = 0; bus < 256; bus++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { - u32 class, cap; - u8 type; - class = read_pci_config(bus, slot, func, - PCI_CLASS_REVISION); - if (class == 0xffffffff) - break; - - switch (class >> 16) { - case PCI_CLASS_BRIDGE_HOST: - case PCI_CLASS_BRIDGE_OTHER: /* needed? */ - /* AGP bridge? */ - cap = find_cap(bus, slot, func, - PCI_CAP_ID_AGP); - if (!cap) - break; - *valid_agp = 1; - return read_agp(bus, slot, func, cap, - order); - } - - /* No multi-function device? */ - type = read_pci_config_byte(bus, slot, func, - PCI_HEADER_TYPE); - if (!(type & 0x80)) - break; - } - } - } - printk(KERN_INFO "No AGP bridge found\n"); - - return 0; -} - -static int gart_fix_e820 __initdata = 1; - -static int __init parse_gart_mem(char *p) -{ - if (!p) - return -EINVAL; - - if (!strncmp(p, "off", 3)) - gart_fix_e820 = 0; - else if (!strncmp(p, "on", 2)) - gart_fix_e820 = 1; - - return 0; -} -early_param("gart_fix_e820", parse_gart_mem); - -void __init early_gart_iommu_check(void) -{ - /* - * in case it is enabled before, esp for kexec/kdump, - * previous kernel already enable that. memset called - * by allocate_aperture/__alloc_bootmem_nopanic cause restart. - * or second kernel have different position for GART hole. and new - * kernel could use hole as RAM that is still used by GART set by - * first kernel - * or BIOS forget to put that in reserved. - * try to update e820 to make that region as reserved. - */ - u32 agp_aper_order = 0; - int i, fix, slot, valid_agp = 0; - u32 ctl; - u32 aper_size = 0, aper_order = 0, last_aper_order = 0; - u64 aper_base = 0, last_aper_base = 0; - int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0; - - if (!early_pci_allowed()) - return; - - /* This is mostly duplicate of iommu_hole_init */ - search_agp_bridge(&agp_aper_order, &valid_agp); - - fix = 0; - for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) { - int bus; - int dev_base, dev_limit; - - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; - - for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) - continue; - - ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); - aper_enabled = ctl & GARTEN; - aper_order = (ctl >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; - aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; - aper_base <<= 25; - - if (last_valid) { - if ((aper_order != last_aper_order) || - (aper_base != last_aper_base) || - (aper_enabled != last_aper_enabled)) { - fix = 1; - break; - } - } - - last_aper_order = aper_order; - last_aper_base = aper_base; - last_aper_enabled = aper_enabled; - last_valid = 1; - } - } - - if (!fix && !aper_enabled) - return; - - if (!aper_base || !aper_size || aper_base + aper_size > 0x100000000UL) - fix = 1; - - if (gart_fix_e820 && !fix && aper_enabled) { - if (e820_any_mapped(aper_base, aper_base + aper_size, - E820_RAM)) { - /* reserve it, so we can reuse it in second kernel */ - printk(KERN_INFO "update e820 for GART\n"); - e820_add_region(aper_base, aper_size, E820_RESERVED); - update_e820(); - } - } - - if (valid_agp) - return; - - /* disable them all at first */ - for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { - int bus; - int dev_base, dev_limit; - - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; - - for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) - continue; - - ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); - ctl &= ~GARTEN; - write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); - } - } - -} - -static int __initdata printed_gart_size_msg; - -int __init gart_iommu_hole_init(void) -{ - u32 agp_aper_base = 0, agp_aper_order = 0; - u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; - u64 aper_base, last_aper_base = 0; - int fix, slot, valid_agp = 0; - int i, node; - - if (gart_iommu_aperture_disabled || !fix_aperture || - !early_pci_allowed()) - return -ENODEV; - - printk(KERN_INFO "Checking aperture...\n"); - - if (!fallback_aper_force) - agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); - - fix = 0; - node = 0; - for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { - int bus; - int dev_base, dev_limit; - u32 ctl; - - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; - - for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) - continue; - - iommu_detected = 1; - gart_iommu_aperture = 1; - x86_init.iommu.iommu_init = gart_iommu_init; - - ctl = read_pci_config(bus, slot, 3, - AMD64_GARTAPERTURECTL); - - /* - * Before we do anything else disable the GART. It may - * still be enabled if we boot into a crash-kernel here. - * Reconfiguring the GART while it is enabled could have - * unknown side-effects. - */ - ctl &= ~GARTEN; - write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); - - aper_order = (ctl >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; - aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; - aper_base <<= 25; - - printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", - node, aper_base, aper_size >> 20); - node++; - - if (!aperture_valid(aper_base, aper_size, 64<<20)) { - if (valid_agp && agp_aper_base && - agp_aper_base == aper_base && - agp_aper_order == aper_order) { - /* the same between two setting from NB and agp */ - if (!no_iommu && - max_pfn > MAX_DMA32_PFN && - !printed_gart_size_msg) { - printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); - printk(KERN_ERR "please increase GART size in your BIOS setup\n"); - printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); - printed_gart_size_msg = 1; - } - } else { - fix = 1; - goto out; - } - } - - if ((last_aper_order && aper_order != last_aper_order) || - (last_aper_base && aper_base != last_aper_base)) { - fix = 1; - goto out; - } - last_aper_order = aper_order; - last_aper_base = aper_base; - } - } - -out: - if (!fix && !fallback_aper_force) { - if (last_aper_base) { - unsigned long n = (32 * 1024 * 1024) << last_aper_order; - - insert_aperture_resource((u32)last_aper_base, n); - return 1; - } - return 0; - } - - if (!fallback_aper_force) { - aper_alloc = agp_aper_base; - aper_order = agp_aper_order; - } - - if (aper_alloc) { - /* Got the aperture from the AGP bridge */ - } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || - force_iommu || - valid_agp || - fallback_aper_force) { - printk(KERN_INFO - "Your BIOS doesn't leave a aperture memory hole\n"); - printk(KERN_INFO - "Please enable the IOMMU option in the BIOS setup\n"); - printk(KERN_INFO - "This costs you %d MB of RAM\n", - 32 << fallback_aper_order); - - aper_order = fallback_aper_order; - aper_alloc = allocate_aperture(); - if (!aper_alloc) { - /* - * Could disable AGP and IOMMU here, but it's - * probably not worth it. But the later users - * cannot deal with bad apertures and turning - * on the aperture over memory causes very - * strange problems, so it's better to panic - * early. - */ - panic("Not enough memory for aperture"); - } - } else { - return 0; - } - - /* Fix up the north bridges */ - for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { - int bus, dev_base, dev_limit; - - /* - * Don't enable translation yet but enable GART IO and CPU - * accesses and set DISTLBWALKPRB since GART table memory is UC. - */ - u32 ctl = aper_order << 1; - - bus = amd_nb_bus_dev_ranges[i].bus; - dev_base = amd_nb_bus_dev_ranges[i].dev_base; - dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; - for (slot = dev_base; slot < dev_limit; slot++) { - if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) - continue; - - write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); - write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); - } - } - - set_up_gart_resume(aper_order, aper_alloc); - - return 1; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/Makefile b/ANDROID_3.4.5/arch/x86/kernel/apic/Makefile deleted file mode 100644 index 0ae0323b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# -# Makefile for local APIC drivers and for the IO-APIC code -# - -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o -obj-y += hw_nmi.o - -obj-$(CONFIG_X86_IO_APIC) += io_apic.o -obj-$(CONFIG_SMP) += ipi.o - -ifeq ($(CONFIG_X86_64),y) -# APIC probe will depend on the listing order here -obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o -obj-$(CONFIG_X86_UV) += x2apic_uv_x.o -obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o -obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o -obj-y += apic_flat_64.o -endif - -# APIC probe will depend on the listing order here -obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-$(CONFIG_X86_SUMMIT) += summit_32.o -obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o -obj-$(CONFIG_X86_ES7000) += es7000_32.o - -# For 32bit, probe_32 need to be listed last -obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/apic.c b/ANDROID_3.4.5/arch/x86/kernel/apic/apic.c deleted file mode 100644 index edc24480..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/apic.c +++ /dev/null @@ -1,2475 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include <linux/perf_event.h> -#include <linux/kernel_stat.h> -#include <linux/mc146818rtc.h> -#include <linux/acpi_pmtmr.h> -#include <linux/clockchips.h> -#include <linux/interrupt.h> -#include <linux/bootmem.h> -#include <linux/ftrace.h> -#include <linux/ioport.h> -#include <linux/module.h> -#include <linux/syscore_ops.h> -#include <linux/delay.h> -#include <linux/timex.h> -#include <linux/i8253.h> -#include <linux/dmar.h> -#include <linux/init.h> -#include <linux/cpu.h> -#include <linux/dmi.h> -#include <linux/smp.h> -#include <linux/mm.h> - -#include <asm/perf_event.h> -#include <asm/x86_init.h> -#include <asm/pgalloc.h> -#include <linux/atomic.h> -#include <asm/mpspec.h> -#include <asm/i8259.h> -#include <asm/proto.h> -#include <asm/apic.h> -#include <asm/io_apic.h> -#include <asm/desc.h> -#include <asm/hpet.h> -#include <asm/idle.h> -#include <asm/mtrr.h> -#include <asm/time.h> -#include <asm/smp.h> -#include <asm/mce.h> -#include <asm/tsc.h> -#include <asm/hypervisor.h> - -unsigned int num_processors; - -unsigned disabled_cpus __cpuinitdata; - -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; - -/* - * The highest APIC ID seen during enumeration. - */ -unsigned int max_physical_apicid; - -/* - * Bitmask of physically existing CPUs: - */ -physid_mask_t phys_cpu_present_map; - -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - -#ifdef CONFIG_X86_32 - -/* - * On x86_32, the mapping between cpu and logical apicid may vary - * depending on apic in use. The following early percpu variable is - * used for the mapping. This is where the behaviors of x86_64 and 32 - * actually diverge. Let's keep it ugly for now. - */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); - -/* - * Knob to control our willingness to enable the local APIC. - * - * +1=force-enable - */ -static int force_enable_local_apic __initdata; -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - -/* - * Handle interrupt mode configuration register (IMCR). - * This register controls whether the interrupt signals - * that reach the BSP come from the master PIC or from the - * local APIC. Before entering Symmetric I/O Mode, either - * the BIOS or the operating system must switch out of - * PIC Mode by changing the IMCR. - */ -static inline void imcr_pic_to_apic(void) -{ - /* select IMCR register */ - outb(0x70, 0x22); - /* NMI and 8259 INTR go through APIC */ - outb(0x01, 0x23); -} - -static inline void imcr_apic_to_pic(void) -{ - /* select IMCR register */ - outb(0x70, 0x22); - /* NMI and 8259 INTR go directly to BSP */ - outb(0x00, 0x23); -} -#endif - -#ifdef CONFIG_X86_64 -static int apic_calibrate_pmtmr __initdata; -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - -int x2apic_mode; -#ifdef CONFIG_X86_X2APIC -/* x2apic enabled before OS handover */ -int x2apic_preenabled; -static int x2apic_disabled; -static int nox2apic; -static __init int setup_nox2apic(char *str) -{ - if (x2apic_enabled()) { - int apicid = native_apic_msr_read(APIC_ID); - - if (apicid >= 255) { - pr_warning("Apicid: %08x, cannot enforce nox2apic\n", - apicid); - return 0; - } - - pr_warning("x2apic already enabled. will disable it\n"); - } else - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - - nox2apic = 1; - - return 0; -} -early_param("nox2apic", setup_nox2apic); -#endif - -unsigned long mp_lapic_addr; -int disable_apic; -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __initdata; -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -int first_system_vector = 0xfe; - -/* - * Debug level, exported for io_apic.c - */ -unsigned int apic_verbosity; - -int pic_mode; - -/* Have we found an MP table */ -int smp_found_config; - -static struct resource lapic_resource = { - .name = "Local APIC", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -unsigned int lapic_timer_frequency = 0; - -static void apic_pm_activate(void); - -static unsigned long apic_phys; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a separate chip - */ -static inline int lapic_is_integrated(void) -{ -#ifdef CONFIG_X86_64 - return 1; -#else - return APIC_INTEGRATED(lapic_get_version()); -#endif -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -/* - * right after this call apic become NOOP driven - * so apic->write/read doesn't do anything - */ -static void __init apic_disable(void) -{ - pr_info("APIC: switched to apic NOOP\n"); - apic = &apic_noop; -} - -void native_apic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -u32 native_safe_apic_wait_icr_idle(void) -{ - u32 send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - inc_irq_stat(icr_read_retry_count); - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -void native_apic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 native_apic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v; - - v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor */ -#define APIC_DIVISOR 16 - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); - - if (!oneshot) - apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -} - -/* - * Setup extended LVT, AMD specific - * - * Software should use the LVT offsets the BIOS provides. The offsets - * are determined by the subsystems using it like those for MCE - * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts - * are supported. Beginning with family 10h at least 4 offsets are - * available. - * - * Since the offsets must be consistent for all cores, we keep track - * of the LVT offsets in software and reserve the offset for the same - * vector also to be used on other cores. An offset is freed by - * setting the entry to APIC_EILVT_MASKED. - * - * If the BIOS is right, there should be no conflicts. Otherwise a - * "[Firmware Bug]: ..." error message is generated. However, if - * software does not properly determines the offsets, it is not - * necessarily a BIOS bug. - */ - -static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX]; - -static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) -{ - return (old & APIC_EILVT_MASKED) - || (new == APIC_EILVT_MASKED) - || ((new & ~APIC_EILVT_MASKED) == old); -} - -static unsigned int reserve_eilvt_offset(int offset, unsigned int new) -{ - unsigned int rsvd, vector; - - if (offset >= APIC_EILVT_NR_MAX) - return ~0; - - rsvd = atomic_read(&eilvt_offsets[offset]); - do { - vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ - if (vector && !eilvt_entry_is_changeable(vector, new)) - /* may not change if vectors are different */ - return rsvd; - rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); - } while (rsvd != new); - - rsvd &= ~APIC_EILVT_MASKED; - if (rsvd && rsvd != vector) - pr_info("LVT offset %d assigned for vector 0x%02x\n", - offset, rsvd); - - return new; -} - -/* - * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. Must be called with - * preemption disabled. - */ - -int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = APIC_EILVTn(offset); - unsigned int new, old, reserved; - - new = (mask << 16) | (msg_type << 8) | vector; - old = apic_read(reg); - reserved = reserve_eilvt_offset(offset, new); - - if (reserved != new) { - pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " - "vector 0x%x, but the register is already in use for " - "vector 0x%x on another cpu\n", - smp_processor_id(), reg, offset, new, reserved); - return -EINVAL; - } - - if (!eilvt_entry_is_changeable(old, new)) { - pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " - "vector 0x%x, but the register is already in use for " - "vector 0x%x on this cpu\n", - smp_processor_id(), reg, offset, new, old); - return -EBUSY; - } - - apic_write(reg, new); - - return 0; -} -EXPORT_SYMBOL_GPL(setup_APIC_eilvt); - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(lapic_timer_frequency, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(const struct cpumask *mask) -{ -#ifdef CONFIG_SMP - apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -/* - * Setup the local APIC timer for this CPU. Copy the initialized values - * of the boot CPU and register the clock event in the framework. - */ -static void __cpuinit setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - if (this_cpu_has(X86_FEATURE_ARAT)) { - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; - /* Make LAPIC timer preferrable over percpu HPET */ - lapic_clockevent.rating = 150; - } - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of(smp_processor_id()); - - clockevents_register_device(levt); -} - -/* - * In this functions we calibrate APIC bus clocks to the external timer. - * - * We want to do the calibration only once since we want to have local timer - * irqs syncron. CPUs connected by the same APIC bus have the very same bus - * frequency. - * - * This was previously done by reading the PIT/HPET and waiting for a wrap - * around to find out, that a tick has elapsed. I have a box, where the PIT - * readout is broken, so it never gets out of the wait loop again. This was - * also reported by others. - * - * Monitoring the jiffies value is inaccurate and the clockevents - * infrastructure allows us to do a simple substitution of the interrupt - * handler. - * - * The calibration routine also uses the pm_timer when possible, as the PIT - * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes - * back to normal later in the boot process). - */ - -#define LAPIC_CAL_LOOPS (HZ/10) - -static __initdata int lapic_cal_loops = -1; -static __initdata long lapic_cal_t1, lapic_cal_t2; -static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; -static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; - -/* - * Temporary interrupt handler. - */ -static void __init lapic_cal_handler(struct clock_event_device *dev) -{ - unsigned long long tsc = 0; - long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); - - if (cpu_has_tsc) - rdtscll(tsc); - - switch (lapic_cal_loops++) { - case 0: - lapic_cal_t1 = tapic; - lapic_cal_tsc1 = tsc; - lapic_cal_pm1 = pm; - lapic_cal_j1 = jiffies; - break; - - case LAPIC_CAL_LOOPS: - lapic_cal_t2 = tapic; - lapic_cal_tsc2 = tsc; - if (pm < lapic_cal_pm1) - pm += ACPI_PM_OVRRUN; - lapic_cal_pm2 = pm; - lapic_cal_j2 = jiffies; - break; - } -} - -static int __init -calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) -{ - const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; - const long pm_thresh = pm_100ms / 100; - unsigned long mult; - u64 res; - -#ifndef CONFIG_X86_PM_TIMER - return -1; -#endif - - apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); - - /* Check, if the PM timer is available */ - if (!deltapm) - return -1; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); - return 0; - } - - res = (((u64)deltapm) * mult) >> 22; - do_div(res, 1000000); - pr_warning("APIC calibration not consistent " - "with PM-Timer: %ldms instead of 100ms\n",(long)res); - - /* Correct the lapic counter value */ - res = (((u64)(*delta)) * pm_100ms); - do_div(res, deltapm); - pr_info("APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long)res, *delta); - *delta = (long)res; - - /* Correct the tsc counter value */ - if (cpu_has_tsc) { - res = (((u64)(*deltatsc)) * pm_100ms); - do_div(res, deltapm); - apic_printk(APIC_VERBOSE, "TSC delta adjusted to " - "PM-Timer: %lu (%ld)\n", - (unsigned long)res, *deltatsc); - *deltatsc = (long)res; - } - - return 0; -} - -static int __init calibrate_APIC_clock(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - void (*real_handler)(struct clock_event_device *dev); - unsigned long deltaj; - long delta, deltatsc; - int pm_referenced = 0; - - /** - * check if lapic timer has already been calibrated by platform - * specific routine, such as tsc calibration code. if so, we just fill - * in the clockevent structure and return. - */ - - if (lapic_timer_frequency) { - apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", - lapic_timer_frequency); - lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, - TICK_NSEC, lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - return 0; - } - - local_irq_disable(); - - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - - /* - * Setup the APIC counter to maximum. There is no way the lapic - * can underflow in the 100ms detection time frame - */ - __setup_APIC_LVTT(0xffffffff, 0, 0); - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - - /* Build delta t1-t2 as apic timer counts down */ - delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - - deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); - - /* we trust the PM based calibration if possible */ - pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, - &delta, &deltatsc); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; - - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - lapic_timer_frequency); - - if (cpu_has_tsc) { - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); - } - - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - lapic_timer_frequency / (1000000 / HZ), - lapic_timer_frequency % (1000000 / HZ)); - - /* - * Do a sanity check on the APIC calibration result - */ - if (lapic_timer_frequency < (1000000 / HZ)) { - local_irq_enable(); - pr_warning("APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - - /* - * PM timer calibration failed or not turned on - * so lets try APIC timer based calibration - */ - if (!pm_referenced) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - - /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - else - levt->features |= CLOCK_EVT_FEAT_DUMMY; - } else - local_irq_enable(); - - if (levt->features & CLOCK_EVT_FEAT_DUMMY) { - pr_warning("APIC timer disabled due to verification failure\n"); - return -1; - } - - return 0; -} - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (disable_apic_timer) { - pr_info("Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) { - lapic_clockevent.mult = 1; - setup_APIC_timer(); - } - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - if (calibrate_APIC_clock()) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __cpuinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - /* - * the NMI deadlock-detector uses this. - */ - inc_irq_stat(apic_timer_irqs); - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - irq_enter(); - exit_idle(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt; - u32 v; - - /* APIC hasn't been mapped yet */ - if (!x2apic_mode && !apic_phys) - return; - - maxlvt = lapic_get_maxlvt(); - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_THERMAL_VECTOR - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif -#ifdef CONFIG_X86_MCE_INTEL - if (maxlvt >= 6) { - v = apic_read(APIC_LVTCMCI); - if (!(v & APIC_LVT_MASKED)) - apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); - } -#endif - - /* - * Clean APIC state for other OSs: - */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned int value; - - /* APIC hasn't been mapped yet */ - if (!x2apic_mode && !apic_phys) - return; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -#endif -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic && !apic_from_smp_config()) - return; - - local_irq_save(flags); - -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif - disable_local_APIC(); - - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ apic->apic_id_mask)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned int value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else -#endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT1, value); -} - -static void __cpuinit lapic_setup_esr(void) -{ - unsigned int oldvalue, value, maxlvt; - - if (!lapic_is_integrated()) { - pr_info("No ESR for 82489DX.\n"); - return; - } - - if (apic->disable_esr) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - pr_info("Leaving ESR disabled.\n"); - return; - } - - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); - - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08x after: 0x%08x\n", - oldvalue, value); -} - -/** - * setup_local_APIC - setup the local APIC - * - * Used to setup local APIC while initializing BSP or bringin up APs. - * Always called with preemption disabled. - */ -void __cpuinit setup_local_APIC(void) -{ - int cpu = smp_processor_id(); - unsigned int value, queued; - int i, j, acked = 0; - unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz; - - if (cpu_has_tsc) - rdtscll(tsc); - - if (disable_apic) { - disable_ioapic_support(); - return; - } - -#ifdef CONFIG_X86_32 - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && apic->disable_esr) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } -#endif - perf_events_lapic_init(); - - /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - BUG_ON(!apic->apic_id_registered()); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - apic->init_apic_ldr(); - -#ifdef CONFIG_X86_32 - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches the - * actual value. - */ - i = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - WARN_ON(i != BAD_APICID && i != logical_smp_processor_id()); - /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - logical_smp_processor_id(); - - /* - * Some NUMA implementations (NUMAQ) don't initialize apicid to - * node mapping during NUMA init. Now that logical apicid is - * guaranteed to be known, give it another chance. This is already - * a bit too late - percpu allocation has already happened without - * proper NUMA affinity. - */ - if (apic->x86_32_numa_cpu_node) - set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu), - apic->x86_32_numa_cpu_node(cpu)); -#endif - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - do { - queued = 0; - for (i = APIC_ISR_NR - 1; i >= 0; i--) - queued |= apic_read(APIC_IRR + i*0x10); - - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1<<j)) { - ack_APIC_irq(); - acked++; - } - } - } - if (acked > 256) { - printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n", - acked); - break; - } - if (cpu_has_tsc) { - rdtscll(ntsc); - max_loops = (cpu_khz << 10) - (ntsc - tsc); - } else - max_loops--; - } while (queued && max_loops > 0); - WARN_ON(max_loops <= 0); - - /* - * Now that we are all set up, enable the APIC - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - /* - * Enable APIC - */ - value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* - * Some unknown Intel IO/APIC (or APIC) errata is biting us with - * certain networking cards. If high frequency interrupts are - * happening on a particular IOAPIC pin, plus the IOAPIC routing - * entry is masked/unmasked at a high rate as well then sooner or - * later IOAPIC line gets 'stuck', no more interrupts are received - * from the device. If focus CPU is disabled then the hang goes - * away, oh well :-( - * - * [ This bug can be reproduced easily with a level-triggered - * PCI Ne2000 networking cards and PII/PIII processors, dual - * BX chipset. ] - */ - /* - * Actually disabling the focus CPU check just makes the hang less - * frequent as it makes the interrupt distributon model be more - * like LRU than MRU (the short-term load is more even across CPUs). - * See also the comment in end_level_ioapic_irq(). --macro - */ - - /* - * - enable focus processor (bit==0) - * - 64bit mode always use processor focus - * so no need to set it - */ - value &= ~APIC_SPIV_FOCUS_DISABLED; -#endif - - /* - * Set spurious IRQ vector - */ - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up LVT0, LVT1: - * - * set up through-local-APIC on the BP's LINT0. This is not - * strictly necessary in pure symmetric-IO mode, but sometimes - * we delegate interrupts to the 8259A. - */ - /* - * TODO: set up through-local-APIC from through-I/O-APIC? --macro - */ - value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!cpu && (pic_mode || !value)) { - value = APIC_DM_EXTINT; - apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); - } else { - value = APIC_DM_EXTINT | APIC_LVT_MASKED; - apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); - } - apic_write(APIC_LVT0, value); - - /* - * only the BP should see the LINT1 NMI signal, obviously. - */ - if (!cpu) - value = APIC_DM_NMI; - else - value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT1, value); - -#ifdef CONFIG_X86_MCE_INTEL - /* Recheck CMCI information after local APIC is up on CPU #0 */ - if (!cpu) - cmci_recheck(); -#endif -} - -void __cpuinit end_local_APIC_setup(void) -{ - lapic_setup_esr(); - -#ifdef CONFIG_X86_32 - { - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); - } -#endif - - apic_pm_activate(); -} - -void __init bsp_end_local_APIC_setup(void) -{ - end_local_APIC_setup(); - - /* - * Now that local APIC setup is completed for BP, configure the fault - * handling for interrupt remapping. - */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); - -} - -#ifdef CONFIG_X86_X2APIC -/* - * Need to disable xapic and x2apic at the same time and then enable xapic mode - */ -static inline void __disable_x2apic(u64 msr) -{ - wrmsrl(MSR_IA32_APICBASE, - msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); - wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); -} - -static __init void disable_x2apic(void) -{ - u64 msr; - - if (!cpu_has_x2apic) - return; - - rdmsrl(MSR_IA32_APICBASE, msr); - if (msr & X2APIC_ENABLE) { - u32 x2apic_id = read_apic_id(); - - if (x2apic_id >= 255) - panic("Cannot disable x2apic, id: %08x\n", x2apic_id); - - pr_info("Disabling x2apic\n"); - __disable_x2apic(msr); - - if (nox2apic) { - clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - } - - x2apic_disabled = 1; - x2apic_mode = 0; - - register_lapic_address(mp_lapic_addr); - } -} - -void check_x2apic(void) -{ - if (x2apic_enabled()) { - pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic_mode = 1; - } -} - -void enable_x2apic(void) -{ - u64 msr; - - rdmsrl(MSR_IA32_APICBASE, msr); - if (x2apic_disabled) { - __disable_x2apic(msr); - return; - } - - if (!x2apic_mode) - return; - - if (!(msr & X2APIC_ENABLE)) { - printk_once(KERN_INFO "Enabling x2apic\n"); - wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); - } -} -#endif /* CONFIG_X86_X2APIC */ - -int __init enable_IR(void) -{ -#ifdef CONFIG_IRQ_REMAP - if (!intr_remapping_supported()) { - pr_debug("intr-remapping not supported\n"); - return -1; - } - - if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling intr-remap because of skipping " - "io-apic setup\n"); - return -1; - } - - return enable_intr_remapping(); -#endif - return -1; -} - -void __init enable_IR_x2apic(void) -{ - unsigned long flags; - int ret, x2apic_enabled = 0; - int dmar_table_init_ret; - - dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret && !x2apic_supported()) - return; - - ret = save_ioapic_entries(); - if (ret) { - pr_info("Saving IO-APIC state failed: %d\n", ret); - return; - } - - local_irq_save(flags); - legacy_pic->mask_all(); - mask_ioapic_entries(); - - if (x2apic_preenabled && nox2apic) - disable_x2apic(); - - if (dmar_table_init_ret) - ret = -1; - else - ret = enable_IR(); - - if (!x2apic_supported()) - goto skip_x2apic; - - if (ret < 0) { - /* IR is required if there is APIC ID > 255 even when running - * under KVM - */ - if (max_physical_apicid > 255 || - !hypervisor_x2apic_available()) { - if (x2apic_preenabled) - disable_x2apic(); - goto skip_x2apic; - } - /* - * without IR all CPUs can be addressed by IOAPIC/MSI - * only in physical mode - */ - x2apic_force_phys(); - } - - if (ret == IRQ_REMAP_XAPIC_MODE) { - pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); - goto skip_x2apic; - } - - x2apic_enabled = 1; - - if (x2apic_supported() && !x2apic_mode) { - x2apic_mode = 1; - enable_x2apic(); - pr_info("Enabled x2apic\n"); - } - -skip_x2apic: - if (ret < 0) /* IR enabling failed */ - restore_ioapic_entries(); - legacy_pic->restore_mask(); - local_irq_restore(flags); -} - -#ifdef CONFIG_X86_64 -/* - * Detect and enable local APICs on non-SMP boards. - * Original code written by Keir Fraser. - * On AMD64 we trust the BIOS - if it says no APIC it is likely - * not correctly set up (usually the APIC timer won't work etc.) - */ -static int __init detect_init_APIC(void) -{ - if (!cpu_has_apic) { - pr_info("No local APIC present\n"); - return -1; - } - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - return 0; -} -#else - -static int __init apic_verify(void) -{ - u32 features, h, l; - - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - pr_warning("Could not enable APIC!\n"); - return -1; - } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - if (boot_cpu_data.x86 >= 6) { - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - } - - pr_info("Found and enabled local APIC!\n"); - return 0; -} - -int __init apic_force_enable(unsigned long addr) -{ - u32 h, l; - - if (disable_apic) - return -1; - - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - if (boot_cpu_data.x86 >= 6) { - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | addr; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - return apic_verify(); -} - -/* - * Detect and initialize APIC - */ -static int __init detect_init_APIC(void) -{ - /* Disabled by kernel option? */ - if (disable_apic) - return -1; - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || - (boot_cpu_data.x86 >= 15)) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Over-ride BIOS and try to enable the local APIC only if - * "lapic" specified. - */ - if (!force_enable_local_apic) { - pr_info("Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); - return -1; - } - if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) - return -1; - } else { - if (apic_verify()) - return -1; - } - - apic_pm_activate(); - - return 0; - -no_apic: - pr_info("No local APIC present or hardware disabled\n"); - return -1; -} -#endif - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ - unsigned int new_apicid; - - if (x2apic_mode) { - boot_cpu_physical_apicid = read_apic_id(); - return; - } - - /* If no local APIC can be found return early */ - if (!smp_found_config && detect_init_APIC()) { - /* lets NOP'ify apic operations */ - pr_info("APIC: disable apic facility\n"); - apic_disable(); - } else { - apic_phys = mp_lapic_addr; - - /* - * acpi lapic path already maps that address in - * acpi_register_lapic_address() - */ - if (!acpi_lapic && !smp_found_config) - register_lapic_address(apic_phys); - } - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - new_apicid = read_apic_id(); - if (boot_cpu_physical_apicid != new_apicid) { - boot_cpu_physical_apicid = new_apicid; - /* - * yeah -- we lie about apic_version - * in case if apic was disabled via boot option - * but it's not a problem for SMP compiled kernel - * since smp_sanity_check is prepared for such a case - * and disable smp mode - */ - apic_version[new_apicid] = - GET_APIC_VERSION(apic_read(APIC_LVR)); - } -} - -void __init register_lapic_address(unsigned long address) -{ - mp_lapic_addr = address; - - if (!x2apic_mode) { - set_fixmap_nocache(FIX_APIC_BASE, address); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, mp_lapic_addr); - } - if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = read_apic_id(); - apic_version[boot_cpu_physical_apicid] = - GET_APIC_VERSION(apic_read(APIC_LVR)); - } -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int apic_version[MAX_LOCAL_APIC]; - -int __init APIC_init_uniprocessor(void) -{ - if (disable_apic) { - pr_info("Apic disabled\n"); - return -1; - } -#ifdef CONFIG_X86_64 - if (!cpu_has_apic) { - disable_apic = 1; - pr_info("Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - pr_err("BIOS bug, local APIC 0x%x not detected!...\n", - boot_cpu_physical_apicid); - return -1; - } -#endif - - default_setup_apic_routing(); - - verify_local_APIC(); - connect_bsp_APIC(); - -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); - -#ifdef CONFIG_X86_IO_APIC - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling error vector - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); -#endif - - bsp_end_local_APIC_setup(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else { - nr_ioapics = 0; - } -#endif - - x86_init.timers.setup_percpu_clockev(); - return 0; -} - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -void smp_spurious_interrupt(struct pt_regs *regs) -{ - u32 v; - - irq_enter(); - exit_idle(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - - inc_irq_stat(irq_spurious_count); - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - pr_info("spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -void smp_error_interrupt(struct pt_regs *regs) -{ - u32 v0, v1; - u32 i = 0; - static const char * const error_interrupt_reason[] = { - "Send CS error", /* APIC Error Bit 0 */ - "Receive CS error", /* APIC Error Bit 1 */ - "Send accept error", /* APIC Error Bit 2 */ - "Receive accept error", /* APIC Error Bit 3 */ - "Redirectable IPI", /* APIC Error Bit 4 */ - "Send illegal vector", /* APIC Error Bit 5 */ - "Received illegal vector", /* APIC Error Bit 6 */ - "Illegal register address", /* APIC Error Bit 7 */ - }; - - irq_enter(); - exit_idle(); - /* First tickle the hardware, only then report what went on. -- REW */ - v0 = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)", - smp_processor_id(), v0 , v1); - - v1 = v1 & 0xff; - while (v1) { - if (v1 & 0x1) - apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); - i++; - v1 >>= 1; - }; - - apic_printk(APIC_DEBUG, KERN_CONT "\n"); - - irq_exit(); -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - imcr_pic_to_apic(); - } -#endif - if (apic->enable_apic_mode) - apic->enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - unsigned int value; - -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - imcr_apic_to_pic(); - return; - } -#endif - - /* Go back to Virtual Wire compatibility mode */ - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); -} - -void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu, max = nr_cpu_ids; - bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, - phys_cpu_present_map); - - /* - * If boot cpu has not been detected yet, then only allow upto - * nr_cpu_ids - 1 processors and keep one slot free for boot cpu - */ - if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && - apicid != boot_cpu_physical_apicid) { - int thiscpu = max + disabled_cpus - 1; - - pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i almost" - " reached. Keeping one slot for boot cpu." - " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; - return; - } - - if (num_processors >= nr_cpu_ids) { - int thiscpu = max + disabled_cpus; - - pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i reached." - " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; - return; - } - - num_processors++; - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - * boot_cpu_init() already hold bit 0 in cpu_present_mask - * for BSP. - */ - cpu = 0; - } else - cpu = cpumask_next_zero(-1, cpu_present_mask); - - /* - * Validate version - */ - if (version == 0x0) { - pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", - cpu, apicid); - version = 0x10; - } - apic_version[apicid] = version; - - if (version != apic_version[boot_cpu_physical_apicid]) { - pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", - apic_version[boot_cpu_physical_apicid], cpu, version); - } - - physid_set(apicid, phys_cpu_present_map); - if (apicid > max_physical_apicid) - max_physical_apicid = apicid; - -#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; - early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; -#endif -#ifdef CONFIG_X86_32 - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - apic->x86_32_early_logical_apicid(cpu); -#endif - set_cpu_possible(cpu, true); - set_cpu_present(cpu, true); -} - -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} - -void default_init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(void) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_THERMAL_VECTOR - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - - if (intr_remapping_enabled) - disable_intr_remapping(); - - local_irq_restore(flags); - return 0; -} - -static void lapic_resume(void) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return; - - local_irq_save(flags); - if (intr_remapping_enabled) { - /* - * IO-APIC and PIC have their own resume routines. - * We just mask them here to make sure the interrupt - * subsystem is completely quiet while we enable x2apic - * and interrupt-remapping. - */ - mask_ioapic_entries(); - legacy_pic->mask_all(); - } - - if (x2apic_mode) - enable_x2apic(); - else { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - if (boot_cpu_data.x86 >= 6) { - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } - } - - maxlvt = lapic_get_maxlvt(); - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - if (intr_remapping_enabled) - reenable_intr_remapping(x2apic_mode); - - local_irq_restore(flags); -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct syscore_ops lapic_syscore_ops = { - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - if (cpu_has_apic) - register_syscore_ops(&lapic_syscore_ops); - - return 0; -} - -/* local apic needs to resume before other devices access its registers. */ -core_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -#ifdef CONFIG_X86_64 - -static int __cpuinit apic_cluster_num(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < nr_cpu_ids; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - return clusters; -} - -static int __cpuinitdata multi_checked; -static int __cpuinitdata multi; - -static int __cpuinit set_multi(const struct dmi_system_id *d) -{ - if (multi) - return 0; - pr_info("APIC: %s detected, Multi Chassis\n", d->ident); - multi = 1; - return 0; -} - -static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { - { - .callback = set_multi, - .ident = "IBM System Summit2", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), - }, - }, - {} -}; - -static void __cpuinit dmi_check_multi(void) -{ - if (multi_checked) - return; - - dmi_check_system(multi_dmi_table); - multi_checked = 1; -} - -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. - * Use DMI to check them - */ -__cpuinit int apic_is_clustered_box(void) -{ - dmi_check_multi(); - if (multi) - return 1; - - if (!is_vsmp_box()) - return 0; - - /* - * ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (apic_cluster_num() > 1) - return 1; - - return 0; -} -#endif - -/* - * APIC command line parameters - */ -static int __init setup_disableapic(char *arg) -{ - disable_apic = 1; - setup_clear_cpu_cap(X86_FEATURE_APIC); - return 0; -} -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ - return setup_disableapic(arg); -} -early_param("nolapic", setup_nolapic); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("nolapic_timer", parse_nolapic_timer); - -static int __init apic_set_verbosity(char *arg) -{ - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - return 0; -#endif - return -EINVAL; - } - - if (strcmp("debug", arg) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) - apic_verbosity = APIC_VERBOSE; - else { - pr_warning("APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - -static int __init lapic_insert_resource(void) -{ - if (!apic_phys) - return -1; - - /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; - insert_resource(&iomem_resource, &lapic_resource); - - return 0; -} - -/* - * need call insert after e820_reserve_resources() - * that is using request_resource - */ -late_initcall(lapic_insert_resource); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/apic_flat_64.c b/ANDROID_3.4.5/arch/x86/kernel/apic/apic_flat_64.c deleted file mode 100644 index 359b6899..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/apic_flat_64.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 - * - * Flat APIC subarch code. - * - * Hacked for x86-64 by James Cleverdon from i386 architecture code by - * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and - * James Cleverdon. - */ -#include <linux/errno.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/hardirq.h> -#include <linux/module.h> -#include <asm/smp.h> -#include <asm/apic.h> -#include <asm/ipi.h> - -#ifdef CONFIG_ACPI -#include <acpi/acpi_bus.h> -#endif - -static struct apic apic_physflat; -static struct apic apic_flat; - -struct apic __read_mostly *apic = &apic_flat; -EXPORT_SYMBOL_GPL(apic); - -static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 1; -} - -static const struct cpumask *flat_target_cpus(void) -{ - return cpu_online_mask; -} - -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -void flat_init_apic_ldr(void) -{ - unsigned long val; - unsigned long num, id; - - num = smp_processor_id(); - id = 1UL << num; - apic_write(APIC_DFR, APIC_DFR_FLAT); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static inline void _flat_send_IPI_mask(unsigned long mask, int vector) -{ - unsigned long flags; - - local_irq_save(flags); - __default_send_IPI_dest_field(mask, vector, apic->dest_logical); - local_irq_restore(flags); -} - -static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - - _flat_send_IPI_mask(mask, vector); -} - -static void - flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - int cpu = smp_processor_id(); - - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); - - _flat_send_IPI_mask(mask, vector); -} - -static void flat_send_IPI_allbutself(int vector) -{ - int cpu = smp_processor_id(); -#ifdef CONFIG_HOTPLUG_CPU - int hotplug = 1; -#else - int hotplug = 0; -#endif - if (hotplug || vector == NMI_VECTOR) { - if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { - unsigned long mask = cpumask_bits(cpu_online_mask)[0]; - - if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); - - _flat_send_IPI_mask(mask, vector); - } - } else if (num_online_cpus() > 1) { - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, - vector, apic->dest_logical); - } -} - -static void flat_send_IPI_all(int vector) -{ - if (vector == NMI_VECTOR) { - flat_send_IPI_mask(cpu_online_mask, vector); - } else { - __default_send_IPI_shortcut(APIC_DEST_ALLINC, - vector, apic->dest_logical); - } -} - -static unsigned int flat_get_apic_id(unsigned long x) -{ - unsigned int id; - - id = (((x)>>24) & 0xFFu); - - return id; -} - -static unsigned long set_apic_id(unsigned int id) -{ - unsigned long x; - - x = ((id & 0xFFu)<<24); - return x; -} - -static unsigned int read_xapic_id(void) -{ - unsigned int id; - - id = flat_get_apic_id(apic_read(APIC_ID)); - return id; -} - -static int flat_apic_id_registered(void) -{ - return physid_isset(read_xapic_id(), phys_cpu_present_map); -} - -static int flat_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - -static int flat_probe(void) -{ - return 1; -} - -static struct apic apic_flat = { - .name = "flat", - .probe = flat_probe, - .acpi_madt_oem_check = flat_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = flat_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = 1, /* logical */ - - .target_cpus = flat_target_cpus, - .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = NULL, - .check_apicid_present = NULL, - - .vector_allocation_domain = flat_vector_allocation_domain, - .init_apic_ldr = flat_init_apic_ldr, - - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, - .multi_timer_check = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = flat_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu << 24, - - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - - .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, - .send_IPI_allbutself = flat_send_IPI_allbutself, - .send_IPI_all = flat_send_IPI_all, - .send_IPI_self = apic_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; - -/* - * Physflat mode is used when there are more than 8 CPUs on a system. - * We cannot use logical delivery in this case because the mask - * overflows, so use physical mode. - */ -static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { - printk(KERN_DEBUG "system APIC only can use physical flat"); - return 1; - } - - if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { - printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); - return 1; - } -#endif - - return 0; -} - -static const struct cpumask *physflat_target_cpus(void) -{ - return cpu_online_mask; -} - -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - default_send_IPI_mask_sequence_phys(cpumask, vector); -} - -static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - default_send_IPI_mask_allbutself_phys(cpumask, vector); -} - -static void physflat_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void physflat_send_IPI_all(int vector) -{ - physflat_send_IPI_mask(cpu_online_mask, vector); -} - -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu); -} - -static int physflat_probe(void) -{ - if (apic == &apic_physflat || num_possible_cpus() > 8) - return 1; - - return 0; -} - -static struct apic apic_physflat = { - - .name = "physical flat", - .probe = physflat_probe, - .acpi_madt_oem_check = physflat_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = flat_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ - - .target_cpus = physflat_target_cpus, - .disable_esr = 0, - .dest_logical = 0, - .check_apicid_used = NULL, - .check_apicid_present = NULL, - - .vector_allocation_domain = physflat_vector_allocation_domain, - /* not needed, but shouldn't hurt: */ - .init_apic_ldr = flat_init_apic_ldr, - - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, - .multi_timer_check = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = flat_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu << 24, - - .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, - - .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, - .send_IPI_allbutself = physflat_send_IPI_allbutself, - .send_IPI_all = physflat_send_IPI_all, - .send_IPI_self = apic_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; - -/* - * We need to check for physflat first, so this order is important. - */ -apic_drivers(apic_physflat, apic_flat); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/apic_noop.c b/ANDROID_3.4.5/arch/x86/kernel/apic/apic_noop.c deleted file mode 100644 index 634ae6cd..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/apic_noop.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * NOOP APIC driver. - * - * Does almost nothing and should be substituted by a real apic driver via - * probe routine. - * - * Though in case if apic is disabled (for some reason) we try - * to not uglify the caller's code and allow to call (some) apic routines - * like self-ipi, etc... - */ - -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <asm/fixmap.h> -#include <asm/mpspec.h> -#include <asm/apicdef.h> -#include <asm/apic.h> -#include <asm/setup.h> - -#include <linux/smp.h> -#include <asm/ipi.h> - -#include <linux/interrupt.h> -#include <asm/acpi.h> -#include <asm/e820.h> - -static void noop_init_apic_ldr(void) { } -static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } -static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } -static void noop_send_IPI_allbutself(int vector) { } -static void noop_send_IPI_all(int vector) { } -static void noop_send_IPI_self(int vector) { } -static void noop_apic_wait_icr_idle(void) { } -static void noop_apic_icr_write(u32 low, u32 id) { } - -static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) -{ - return -1; -} - -static u32 noop_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static u64 noop_apic_icr_read(void) -{ - return 0; -} - -static int noop_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return 0; -} - -static unsigned int noop_get_apic_id(unsigned long x) -{ - return 0; -} - -static int noop_probe(void) -{ - /* - * NOOP apic should not ever be - * enabled via probe routine - */ - return 0; -} - -static int noop_apic_id_registered(void) -{ - /* - * if we would be really "pedantic" - * we should pass read_apic_id() here - * but since NOOP suppose APIC ID = 0 - * lets save a few cycles - */ - return physid_isset(0, phys_cpu_present_map); -} - -static const struct cpumask *noop_target_cpus(void) -{ - /* only BSP here */ - return cpumask_of(0); -} - -static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -static unsigned long noop_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - if (cpu != 0) - pr_warning("APIC: Vector allocated for non-BSP cpu\n"); - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -static u32 noop_apic_read(u32 reg) -{ - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); - return 0; -} - -static void noop_apic_write(u32 reg, u32 v) -{ - WARN_ON_ONCE(cpu_has_apic && !disable_apic); -} - -struct apic apic_noop = { - .name = "noop", - .probe = noop_probe, - .acpi_madt_oem_check = NULL, - - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = noop_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = noop_target_cpus, - .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = noop_check_apicid_used, - .check_apicid_present = noop_check_apicid_present, - - .vector_allocation_domain = noop_vector_allocation_domain, - .init_apic_ldr = noop_init_apic_ldr, - - .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = NULL, - .multi_timer_check = NULL, - - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = physid_set_mask_of_physid, - - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - - .phys_pkg_id = noop_phys_pkg_id, - - .mps_oem_check = NULL, - - .get_apic_id = noop_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - - .send_IPI_mask = noop_send_IPI_mask, - .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, - .send_IPI_allbutself = noop_send_IPI_allbutself, - .send_IPI_all = noop_send_IPI_all, - .send_IPI_self = noop_send_IPI_self, - - .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, - - /* should be safe */ - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = NULL, - - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, - - .read = noop_apic_read, - .write = noop_apic_write, - .icr_read = noop_apic_icr_read, - .icr_write = noop_apic_icr_write, - .wait_icr_idle = noop_apic_wait_icr_idle, - .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, - -#ifdef CONFIG_X86_32 - .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, -#endif -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/apic_numachip.c b/ANDROID_3.4.5/arch/x86/kernel/apic/apic_numachip.c deleted file mode 100644 index 23e75422..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/apic_numachip.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Numascale NumaConnect-Specific APIC Code - * - * Copyright (C) 2011 Numascale AS. All rights reserved. - * - * Send feedback to <support@numascale.com> - * - */ - -#include <linux/errno.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/hardirq.h> -#include <linux/delay.h> - -#include <asm/numachip/numachip_csr.h> -#include <asm/smp.h> -#include <asm/apic.h> -#include <asm/ipi.h> -#include <asm/apic_flat_64.h> - -static int numachip_system __read_mostly; - -static struct apic apic_numachip __read_mostly; - -static unsigned int get_apic_id(unsigned long x) -{ - unsigned long value; - unsigned int id; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U); - - return id; -} - -static unsigned long set_apic_id(unsigned int id) -{ - unsigned long x; - - x = ((id & 0xffU) << 24); - return x; -} - -static unsigned int read_xapic_id(void) -{ - return get_apic_id(apic_read(APIC_ID)); -} - -static int numachip_apic_id_valid(int apicid) -{ - /* Trust what bootloader passes in MADT */ - return 1; -} - -static int numachip_apic_id_registered(void) -{ - return physid_isset(read_xapic_id(), phys_cpu_present_map); -} - -static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - -static const struct cpumask *numachip_target_cpus(void) -{ - return cpu_online_mask; -} - -static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) -{ - union numachip_csr_g3_ext_irq_gen int_gen; - - int_gen.s._destination_apic_id = phys_apicid; - int_gen.s._vector = 0; - int_gen.s._msgtype = APIC_DM_INIT >> 8; - int_gen.s._index = 0; - - write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); - - int_gen.s._msgtype = APIC_DM_STARTUP >> 8; - int_gen.s._vector = start_rip >> 12; - - write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); - - atomic_set(&init_deasserted, 1); - return 0; -} - -static void numachip_send_IPI_one(int cpu, int vector) -{ - union numachip_csr_g3_ext_irq_gen int_gen; - int apicid = per_cpu(x86_cpu_to_apicid, cpu); - - int_gen.s._destination_apic_id = apicid; - int_gen.s._vector = vector; - int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8; - int_gen.s._index = 0; - - write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); -} - -static void numachip_send_IPI_mask(const struct cpumask *mask, int vector) -{ - unsigned int cpu; - - for_each_cpu(cpu, mask) - numachip_send_IPI_one(cpu, vector); -} - -static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int cpu; - - for_each_cpu(cpu, mask) { - if (cpu != this_cpu) - numachip_send_IPI_one(cpu, vector); - } -} - -static void numachip_send_IPI_allbutself(int vector) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int cpu; - - for_each_online_cpu(cpu) { - if (cpu != this_cpu) - numachip_send_IPI_one(cpu, vector); - } -} - -static void numachip_send_IPI_all(int vector) -{ - numachip_send_IPI_mask(cpu_online_mask, vector); -} - -static void numachip_send_IPI_self(int vector) -{ - __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); -} - -static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if (likely((unsigned)cpu < nr_cpu_ids)) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; -} - -static unsigned int -numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu); -} - -static int __init numachip_probe(void) -{ - return apic == &apic_numachip; -} - -static void __init map_csrs(void) -{ - printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n", - NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1); - init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); - - printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n", - NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1); - init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE); -} - -static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) -{ - - if (c->phys_proc_id != node) { - c->phys_proc_id = node; - per_cpu(cpu_llc_id, smp_processor_id()) = node; - } -} - -static int __init numachip_system_init(void) -{ - unsigned int val; - - if (!numachip_system) - return 0; - - x86_cpuinit.fixup_cpu_id = fixup_cpu_id; - - map_csrs(); - - val = read_lcsr(CSR_G0_NODE_IDS); - printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val); - - return 0; -} -early_initcall(numachip_system_init); - -static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "NUMASC", 6)) { - numachip_system = 1; - return 1; - } - - return 0; -} - -static struct apic apic_numachip __refconst = { - - .name = "NumaConnect system", - .probe = numachip_probe, - .acpi_madt_oem_check = numachip_acpi_madt_oem_check, - .apic_id_valid = numachip_apic_id_valid, - .apic_id_registered = numachip_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ - - .target_cpus = numachip_target_cpus, - .disable_esr = 0, - .dest_logical = 0, - .check_apicid_used = NULL, - .check_apicid_present = NULL, - - .vector_allocation_domain = numachip_vector_allocation_domain, - .init_apic_ldr = flat_init_apic_ldr, - - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, - .multi_timer_check = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = numachip_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = 0xffU << 24, - - .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and, - - .send_IPI_mask = numachip_send_IPI_mask, - .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, - .send_IPI_allbutself = numachip_send_IPI_allbutself, - .send_IPI_all = numachip_send_IPI_all, - .send_IPI_self = numachip_send_IPI_self, - - .wakeup_secondary_cpu = numachip_wakeup_secondary, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, /* REMRD not supported */ - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; -apic_driver(apic_numachip); - diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/bigsmp_32.c b/ANDROID_3.4.5/arch/x86/kernel/apic/bigsmp_32.c deleted file mode 100644 index 0cdec706..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/bigsmp_32.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs. - * - * Drives the local APIC in "clustered mode". - */ -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/dmi.h> -#include <linux/smp.h> - -#include <asm/apicdef.h> -#include <asm/fixmap.h> -#include <asm/mpspec.h> -#include <asm/apic.h> -#include <asm/ipi.h> - -static unsigned bigsmp_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -static int bigsmp_apic_id_registered(void) -{ - return 1; -} - -static const struct cpumask *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - -static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) -{ - return 0; -} - -static unsigned long bigsmp_check_apicid_present(int bit) -{ - return 1; -} - -static int bigsmp_early_logical_apicid(int cpu) -{ - /* on bigsmp, logical apicid is the same as physical */ - return early_per_cpu(x86_cpu_to_apicid, cpu); -} - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = per_cpu(x86_bios_cpu_apicid, cpu); - val |= SET_APIC_LOGICAL_ID(id); - - return val; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static void bigsmp_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_FLAT); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void bigsmp_setup_apic_routing(void) -{ - printk(KERN_INFO - "Enabling APIC mode: Physflat. Using %d I/O APICs\n", - nr_ioapics); -} - -static int bigsmp_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - - return BAD_APICID; -} - -static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0xFFL, retmap); -} - -static int bigsmp_check_phys_apicid_present(int phys_apicid) -{ - return 1; -} - -/* As we are using single CPU as destination, pick only one CPU here */ -static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu = cpumask_first(cpumask); - - if (cpu < nr_cpu_ids) - return cpu_physical_id(cpu); - return BAD_APICID; -} - -static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - return cpu_physical_id(cpu); - } - return BAD_APICID; -} - -static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_phys(mask, vector); -} - -static void bigsmp_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void bigsmp_send_IPI_all(int vector) -{ - bigsmp_send_IPI_mask(cpu_online_mask, vector); -} - -static int dmi_bigsmp; /* can be set by dmi scanners */ - -static int hp_ht_bigsmp(const struct dmi_system_id *d) -{ - printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); - dmi_bigsmp = 1; - - return 0; -} - - -static const struct dmi_system_id bigsmp_dmi_table[] = { - { hp_ht_bigsmp, "HP ProLiant DL760 G2", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P44-"), - } - }, - - { hp_ht_bigsmp, "HP ProLiant DL740", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P47-"), - } - }, - { } /* NULL entry stops DMI scanning */ -}; - -static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -static int probe_bigsmp(void) -{ - if (def_to_bigsmp) - dmi_bigsmp = 1; - else - dmi_check_system(bigsmp_dmi_table); - - return dmi_bigsmp; -} - -static struct apic apic_bigsmp = { - - .name = "bigsmp", - .probe = probe_bigsmp, - .acpi_madt_oem_check = NULL, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = bigsmp_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - /* phys delivery to target CPU: */ - .irq_dest_mode = 0, - - .target_cpus = bigsmp_target_cpus, - .disable_esr = 1, - .dest_logical = 0, - .check_apicid_used = bigsmp_check_apicid_used, - .check_apicid_present = bigsmp_check_apicid_present, - - .vector_allocation_domain = bigsmp_vector_allocation_domain, - .init_apic_ldr = bigsmp_init_apic_ldr, - - .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, - .setup_apic_routing = bigsmp_setup_apic_routing, - .multi_timer_check = NULL, - .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, - .check_phys_apicid_present = bigsmp_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = bigsmp_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = bigsmp_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, - - .send_IPI_mask = bigsmp_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = bigsmp_send_IPI_allbutself, - .send_IPI_all = bigsmp_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, -}; - -void __init generic_bigsmp_probe(void) -{ - unsigned int cpu; - - if (!probe_bigsmp()) - return; - - apic = &apic_bigsmp; - - for_each_possible_cpu(cpu) { - if (early_per_cpu(x86_cpu_to_logical_apicid, - cpu) == BAD_APICID) - continue; - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - bigsmp_early_logical_apicid(cpu); - } - - pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name); -} - -apic_driver(apic_bigsmp); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/es7000_32.c b/ANDROID_3.4.5/arch/x86/kernel/apic/es7000_32.c deleted file mode 100644 index e42d1d3b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/es7000_32.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. - * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar - * - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/notifier.h> -#include <linux/spinlock.h> -#include <linux/cpumask.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/reboot.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/acpi.h> -#include <linux/init.h> -#include <linux/gfp.h> -#include <linux/nmi.h> -#include <linux/smp.h> -#include <linux/io.h> - -#include <asm/apicdef.h> -#include <linux/atomic.h> -#include <asm/fixmap.h> -#include <asm/mpspec.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/ipi.h> - -/* - * ES7000 chipsets - */ - -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 - -#define MIP_REG 1 -#define MIP_PSAI_REG 4 - -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_SW_APIC 0x1020b - -#define MIP_PORT(val) ((val >> 32) & 0xffff) - -#define MIP_RD_LO(val) (val & 0xffffffff) - -struct mip_reg { - unsigned long long off_0x00; - unsigned long long off_0x08; - unsigned long long off_0x10; - unsigned long long off_0x18; - unsigned long long off_0x20; - unsigned long long off_0x28; - unsigned long long off_0x30; - unsigned long long off_0x38; -}; - -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -#ifdef CONFIG_ACPI - -struct es7000_oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -static unsigned long oem_addrX; -static unsigned long oem_size; - -#endif - -/* - * ES7000 Globals - */ - -static volatile unsigned long *psai; -static struct mip_reg *mip_reg; -static struct mip_reg *host_reg; -static int mip_port; -static unsigned long mip_addr; -static unsigned long host_addr; - -int es7000_plat; - -/* - * GSI override for ES7000 platforms. - */ - - -static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) -{ - unsigned long vect = 0, psaival = 0; - - if (psai == NULL) - return -1; - - vect = ((unsigned long)__pa(eip)/0x1000) << 16; - psaival = (0x1000000 | vect | cpu); - - while (*psai & 0x1000000) - ; - - *psai = psaival; - - return 0; -} - -static int es7000_apic_is_cluster(void) -{ - /* MPENTIUMIII */ - if (boot_cpu_data.x86 == 6 && - (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11)) - return 1; - - return 0; -} - -static void setup_unisys(void) -{ - /* - * Determine the generation of the ES7000 currently running. - * - * es7000_plat = 1 if the machine is a 5xx ES7000 box - * es7000_plat = 2 if the machine is a x86_64 ES7000 box - * - */ - if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) - es7000_plat = ES7000_ZORRO; - else - es7000_plat = ES7000_CLASSIC; -} - -/* - * Parse the OEM Table: - */ -static int parse_unisys_oem(char *oemptr) -{ - int i; - int success = 0; - unsigned char type, size; - unsigned long val; - char *tp = NULL; - struct psai *psaip = NULL; - struct mip_reg_info *mi; - struct mip_reg *host, *mip; - - tp = oemptr; - - tp += 8; - - for (i = 0; i <= 6; i++) { - type = *tp++; - size = *tp++; - tp -= 2; - switch (type) { - case MIP_REG: - mi = (struct mip_reg_info *)tp; - val = MIP_RD_LO(mi->host_reg); - host_addr = val; - host = (struct mip_reg *)val; - host_reg = __va(host); - val = MIP_RD_LO(mi->mip_reg); - mip_port = MIP_PORT(mi->mip_info); - mip_addr = val; - mip = (struct mip_reg *)val; - mip_reg = __va(mip); - pr_debug("host_reg = 0x%lx\n", - (unsigned long)host_reg); - pr_debug("mip_reg = 0x%lx\n", - (unsigned long)mip_reg); - success++; - break; - case MIP_PSAI_REG: - psaip = (struct psai *)tp; - if (tp != NULL) { - if (psaip->addr) - psai = __va(psaip->addr); - else - psai = NULL; - success++; - } - break; - default: - break; - } - tp += size; - } - - if (success < 2) - es7000_plat = NON_UNISYS; - else - setup_unisys(); - - return es7000_plat; -} - -#ifdef CONFIG_ACPI -static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) -{ - struct acpi_table_header *header = NULL; - struct es7000_oem_table *table; - acpi_size tbl_size; - acpi_status ret; - int i = 0; - - for (;;) { - ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size); - if (!ACPI_SUCCESS(ret)) - return -1; - - if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) - break; - - early_acpi_os_unmap_memory(header, tbl_size); - } - - table = (void *)header; - - oem_addrX = table->OEMTableAddr; - oem_size = table->OEMTableSize; - - early_acpi_os_unmap_memory(header, tbl_size); - - *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); - - return 0; -} - -static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) -{ - if (!oem_addr) - return; - - __acpi_unmap_table((char *)oem_addr, oem_size); -} - -static int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} - -static int es7000_acpi_ret; - -/* Hook from generic ACPI tables.c */ -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr = 0; - int check_dsdt; - int ret = 0; - - /* check dsdt at first to avoid clear fix_map for oem_addr */ - check_dsdt = es7000_check_dsdt(); - - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (check_dsdt) { - ret = parse_unisys_oem((char *)oem_addr); - } else { - setup_unisys(); - ret = 1; - } - /* - * we need to unmap it - */ - unmap_unisys_acpi_oem_table(oem_addr); - } - - es7000_acpi_ret = ret; - - return ret && !es7000_apic_is_cluster(); -} - -static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) -{ - int ret = es7000_acpi_ret; - - return ret && es7000_apic_is_cluster(); -} - -#else /* !CONFIG_ACPI: */ -static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} - -static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif /* !CONFIG_ACPI */ - -static void es7000_spin(int n) -{ - int i = 0; - - while (i++ < n) - rep_nop(); -} - -static int es7000_mip_write(struct mip_reg *mip_reg) -{ - int status = 0; - int spin; - - spin = MIP_SPIN; - while ((host_reg->off_0x38 & MIP_VALID) != 0) { - if (--spin <= 0) { - WARN(1, "Timeout waiting for Host Valid Flag\n"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); - outb(1, mip_port); - - spin = MIP_SPIN; - - while ((mip_reg->off_0x38 & MIP_VALID) == 0) { - if (--spin <= 0) { - WARN(1, "Timeout waiting for MIP Valid Flag\n"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48; - mip_reg->off_0x38 &= ~MIP_VALID; - - return status; -} - -static void es7000_enable_apic_mode(void) -{ - struct mip_reg es7000_mip_reg; - int mip_status; - - if (!es7000_plat) - return; - - pr_info("Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0x00 = MIP_SW_APIC; - es7000_mip_reg.off_0x38 = MIP_VALID; - - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) - WARN(1, "Command failed, status = %x\n", mip_status); -} - -static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - - -static void es7000_wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); -} - -static unsigned int es7000_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_phys(mask, vector); -} - -static void es7000_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void es7000_send_IPI_all(int vector) -{ - es7000_send_IPI_mask(cpu_online_mask, vector); -} - -static int es7000_apic_id_registered(void) -{ - return 1; -} - -static const struct cpumask *target_cpus_cluster(void) -{ - return cpu_all_mask; -} - -static const struct cpumask *es7000_target_cpus(void) -{ - return cpumask_of(smp_processor_id()); -} - -static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) -{ - return 0; -} - -static unsigned long es7000_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static int es7000_early_logical_apicid(int cpu) -{ - /* on es7000, logical apicid is the same as physical */ - return early_per_cpu(x86_bios_cpu_apicid, cpu); -} - -static unsigned long calculate_ldr(int cpu) -{ - unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); - - return SET_APIC_LOGICAL_ID(id); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static void es7000_init_apic_ldr_cluster(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_CLUSTER); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_FLAT); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - - pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); -} - -static int es7000_cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) - return per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static int cpu_id; - -static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) -{ - physid_set_mask_of_physid(cpu_id, retmap); - ++cpu_id; -} - -static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0xFFL, retmap); -} - -static int es7000_check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return 1; -} - -static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - unsigned int round = 0; - int cpu, uninitialized_var(apicid); - - /* - * The cpus in the mask must all be on the apic cluster. - */ - for_each_cpu(cpu, cpumask) { - int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - - if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - WARN(1, "Not a valid mask!"); - - return BAD_APICID; - } - apicid = new_apicid; - round++; - } - return apicid; -} - -static unsigned int -es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static int probe_es7000(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static int es7000_mps_ret; -static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem, - char *productid) -{ - int ret = 0; - - if (mpc->oemptr) { - struct mpc_oemtable *oem_table = - (struct mpc_oemtable *)mpc->oemptr; - - if (!strncmp(oem, "UNISYS", 6)) - ret = parse_unisys_oem((char *)oem_table); - } - - es7000_mps_ret = ret; - - return ret && !es7000_apic_is_cluster(); -} - -static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, - char *productid) -{ - int ret = es7000_mps_ret; - - return ret && es7000_apic_is_cluster(); -} - -/* We've been warned by a false positive warning.Use __refdata to keep calm. */ -static struct apic __refdata apic_es7000_cluster = { - - .name = "es7000", - .probe = probe_es7000, - .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = es7000_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all procs: */ - .irq_dest_mode = 1, - - .target_cpus = target_cpus_cluster, - .disable_esr = 1, - .dest_logical = 0, - .check_apicid_used = es7000_check_apicid_used, - .check_apicid_present = es7000_check_apicid_present, - - .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = es7000_init_apic_ldr_cluster, - - .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = NULL, - .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = es7000_mps_oem_check_cluster, - - .get_apic_id = es7000_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - - .send_IPI_mask = es7000_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = es7000_send_IPI_allbutself, - .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip, - - .trampoline_phys_low = 0x467, - .trampoline_phys_high = 0x469, - - .wait_for_init_deassert = NULL, - - /* Nothing to do for most platforms, since cleared by the INIT cycle: */ - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = es7000_early_logical_apicid, -}; - -static struct apic __refdata apic_es7000 = { - - .name = "es7000", - .probe = probe_es7000, - .acpi_madt_oem_check = es7000_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = es7000_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - /* phys delivery to target CPUs: */ - .irq_dest_mode = 0, - - .target_cpus = es7000_target_cpus, - .disable_esr = 1, - .dest_logical = 0, - .check_apicid_used = es7000_check_apicid_used, - .check_apicid_present = es7000_check_apicid_present, - - .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = es7000_init_apic_ldr, - - .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = NULL, - .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = es7000_mps_oem_check, - - .get_apic_id = es7000_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - - .send_IPI_mask = es7000_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = es7000_send_IPI_allbutself, - .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .trampoline_phys_low = 0x467, - .trampoline_phys_high = 0x469, - - .wait_for_init_deassert = es7000_wait_for_init_deassert, - - /* Nothing to do for most platforms, since cleared by the INIT cycle: */ - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = es7000_early_logical_apicid, -}; - -/* - * Need to check for es7000 followed by es7000_cluster, so this order - * in apic_drivers is important. - */ -apic_drivers(apic_es7000, apic_es7000_cluster); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/hw_nmi.c b/ANDROID_3.4.5/arch/x86/kernel/apic/hw_nmi.c deleted file mode 100644 index 31cb9ae9..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/hw_nmi.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * HW NMI watchdog support - * - * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. - * - * Arch specific calls to support NMI watchdog - * - * Bits copied from original nmi.c file - * - */ -#include <asm/apic.h> - -#include <linux/cpumask.h> -#include <linux/kdebug.h> -#include <linux/notifier.h> -#include <linux/kprobes.h> -#include <linux/nmi.h> -#include <linux/module.h> -#include <linux/delay.h> - -#ifdef CONFIG_HARDLOCKUP_DETECTOR -u64 hw_nmi_get_sample_period(int watchdog_thresh) -{ - return (u64)(cpu_khz) * 1000 * watchdog_thresh; -} -#endif - -#ifdef arch_trigger_all_cpu_backtrace -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - -/* "in progress" flag of arch_trigger_all_cpu_backtrace */ -static unsigned long backtrace_flag; - -void arch_trigger_all_cpu_backtrace(void) -{ - int i; - - if (test_and_set_bit(0, &backtrace_flag)) - /* - * If there is already a trigger_all_cpu_backtrace() in progress - * (backtrace_flag == 1), don't output double cpu dump infos. - */ - return; - - cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); - - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); - - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(to_cpumask(backtrace_mask))) - break; - mdelay(1); - } - - clear_bit(0, &backtrace_flag); - smp_mb__after_clear_bit(); -} - -static int __kprobes -arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) -{ - int cpu; - - cpu = smp_processor_id(); - - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; - - arch_spin_lock(&lock); - printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); - show_regs(regs); - arch_spin_unlock(&lock); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return NMI_HANDLED; - } - - return NMI_DONE; -} - -static int __init register_trigger_all_cpu_backtrace(void) -{ - register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler, - 0, "arch_bt"); - return 0; -} -early_initcall(register_trigger_all_cpu_backtrace); -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/io_apic.c b/ANDROID_3.4.5/arch/x86/kernel/apic/io_apic.c deleted file mode 100644 index e88300d8..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/io_apic.c +++ /dev/null @@ -1,4130 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and - * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>, - * further tested and cleaned up by Zach Brown <zab@redhat.com> - * and Ingo Molnar <mingo@redhat.com> - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/pci.h> -#include <linux/mc146818rtc.h> -#include <linux/compiler.h> -#include <linux/acpi.h> -#include <linux/module.h> -#include <linux/syscore_ops.h> -#include <linux/msi.h> -#include <linux/htirq.h> -#include <linux/freezer.h> -#include <linux/kthread.h> -#include <linux/jiffies.h> /* time_after() */ -#include <linux/slab.h> -#ifdef CONFIG_ACPI -#include <acpi/acpi_bus.h> -#endif -#include <linux/bootmem.h> -#include <linux/dmar.h> -#include <linux/hpet.h> - -#include <asm/idle.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/cpu.h> -#include <asm/desc.h> -#include <asm/proto.h> -#include <asm/acpi.h> -#include <asm/dma.h> -#include <asm/timer.h> -#include <asm/i8259.h> -#include <asm/msidef.h> -#include <asm/hypertransport.h> -#include <asm/setup.h> -#include <asm/irq_remapping.h> -#include <asm/hpet.h> -#include <asm/hw_irq.h> - -#include <asm/apic.h> - -#define __apicdebuginit(type) static type __init - -#define for_each_irq_pin(entry, head) \ - for (entry = head; entry; entry = entry->next) - -static void __init __ioapic_init_mappings(void); - -static unsigned int __io_apic_read (unsigned int apic, unsigned int reg); -static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val); -static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); - -static struct io_apic_ops io_apic_ops = { - .init = __ioapic_init_mappings, - .read = __io_apic_read, - .write = __io_apic_write, - .modify = __io_apic_modify, -}; - -void __init set_io_apic_ops(const struct io_apic_ops *ops) -{ - io_apic_ops = *ops; -} - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -static DEFINE_RAW_SPINLOCK(ioapic_lock); -static DEFINE_RAW_SPINLOCK(vector_lock); - -static struct ioapic { - /* - * # of IRQ routing registers - */ - int nr_registers; - /* - * Saved state during suspend/resume, or while enabling intr-remap. - */ - struct IO_APIC_route_entry *saved_registers; - /* I/O APIC config */ - struct mpc_ioapic mp_config; - /* IO APIC gsi routing info */ - struct mp_ioapic_gsi gsi_config; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); -} ioapics[MAX_IO_APICS]; - -#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver - -int mpc_ioapic_id(int ioapic_idx) -{ - return ioapics[ioapic_idx].mp_config.apicid; -} - -unsigned int mpc_ioapic_addr(int ioapic_idx) -{ - return ioapics[ioapic_idx].mp_config.apicaddr; -} - -struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) -{ - return &ioapics[ioapic_idx].gsi_config; -} - -int nr_ioapics; - -/* The one past the highest gsi number used */ -u32 gsi_top; - -/* MP IRQ source entries */ -struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -/* GSI interrupts */ -static int nr_irqs_gsi = NR_IRQS_LEGACY; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -int skip_ioapic_setup; - -/** - * disable_ioapic_support() - disables ioapic support at runtime - */ -void disable_ioapic_support(void) -{ -#ifdef CONFIG_PCI - noioapicquirk = 1; - noioapicreroute = -1; -#endif - skip_ioapic_setup = 1; -} - -static int __init parse_noapic(char *str) -{ - /* disable IO-APIC */ - disable_ioapic_support(); - return 0; -} -early_param("noapic", parse_noapic); - -static int io_apic_setup_irq_pin(unsigned int irq, int node, - struct io_apic_irq_attr *attr); - -/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ -void mp_save_irq(struct mpc_intsrc *m) -{ - int i; - - apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, - m->srcbusirq, m->dstapic, m->dstirq); - - for (i = 0; i < mp_irq_entries; i++) { - if (!memcmp(&mp_irqs[i], m, sizeof(*m))) - return; - } - - memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m)); - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} - -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; - -static struct irq_pin_list *alloc_irq_pin_list(int node) -{ - return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); -} - - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; - -int __init arch_early_irq_init(void) -{ - struct irq_cfg *cfg; - int count, node, i; - - if (!legacy_pic->nr_legacy_irqs) - io_apic_irqs = ~0UL; - - for (i = 0; i < nr_ioapics; i++) { - ioapics[i].saved_registers = - kzalloc(sizeof(struct IO_APIC_route_entry) * - ioapics[i].nr_registers, GFP_KERNEL); - if (!ioapics[i].saved_registers) - pr_err("IOAPIC %d: suspend/resume impossible!\n", i); - } - - cfg = irq_cfgx; - count = ARRAY_SIZE(irq_cfgx); - node = cpu_to_node(0); - - /* Make sure the legacy interrupts are marked in the bitmap */ - irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); - - for (i = 0; i < count; i++) { - irq_set_chip_data(i, &cfg[i]); - zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); - zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); - /* - * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. - */ - if (i < legacy_pic->nr_legacy_irqs) { - cfg[i].vector = IRQ0_VECTOR + i; - cpumask_set_cpu(0, cfg[i].domain); - } - } - - return 0; -} - -static struct irq_cfg *irq_cfg(unsigned int irq) -{ - return irq_get_chip_data(irq); -} - -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) -{ - struct irq_cfg *cfg; - - cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); - if (!cfg) - return NULL; - if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) - goto out_cfg; - if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) - goto out_domain; - return cfg; -out_domain: - free_cpumask_var(cfg->domain); -out_cfg: - kfree(cfg); - return NULL; -} - -static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) -{ - if (!cfg) - return; - irq_set_chip_data(at, NULL); - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); -} - -static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) -{ - int res = irq_alloc_desc_at(at, node); - struct irq_cfg *cfg; - - if (res < 0) { - if (res != -EEXIST) - return NULL; - cfg = irq_get_chip_data(at); - if (cfg) - return cfg; - } - - cfg = alloc_irq_cfg(at, node); - if (cfg) - irq_set_chip_data(at, cfg); - else - irq_free_desc(at); - return cfg; -} - -static int alloc_irq_from(unsigned int from, int node) -{ - return irq_alloc_desc_from(from, node); -} - -static void free_irq_at(unsigned int at, struct irq_cfg *cfg) -{ - free_irq_cfg(at, cfg); - irq_free_desc(at); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - return io_apic_ops.read(apic, reg); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - io_apic_ops.write(apic, reg, value); -} - -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - io_apic_ops.modify(apic, reg, value); -} - - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; - unsigned int unused2[11]; - unsigned int eoi; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mpc_ioapic_addr(idx) & ~PAGE_MASK); -} - -static inline void io_apic_eoi(unsigned int apic, unsigned int vector) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(vector, &io_apic->eoi); -} - -static unsigned int __io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -static bool io_apic_level_ack_pending(struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - int pin; - - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - /* Is the remote IRR bit set? */ - if (reg & IO_APIC_REDIR_REMOTE_IRR) { - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return true; - } - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - return false; -} - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - - return eu.entry; -} - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - eu.entry = __ioapic_read_entry(apic, pin); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu = {{0, 0}}; - - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) -{ - struct irq_pin_list **last, *entry; - - /* don't allow duplicates */ - last = &cfg->irq_2_pin; - for_each_irq_pin(entry, cfg->irq_2_pin) { - if (entry->apic == apic && entry->pin == pin) - return 0; - last = &entry->next; - } - - entry = alloc_irq_pin_list(node); - if (!entry) { - printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", - node, apic, pin); - return -ENOMEM; - } - entry->apic = apic; - entry->pin = pin; - - *last = entry; - return 0; -} - -static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) -{ - if (__add_pin_to_irq_node(cfg, node, apic, pin)) - panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - /* every one is different, right? */ - return; - } - } - - /* old apic/pin didn't exist, so just add new ones */ - add_pin_to_irq_node(cfg, node, newapic, newpin); -} - -static void __io_apic_modify_irq(struct irq_pin_list *entry, - int mask_and, int mask_or, - void (*final)(struct irq_pin_list *entry)) -{ - unsigned int reg, pin; - - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); -} - -static void io_apic_modify_irq(struct irq_cfg *cfg, - int mask_and, int mask_or, - void (*final)(struct irq_pin_list *entry)) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) - __io_apic_modify_irq(entry, mask_and, mask_or, final); -} - -static void io_apic_sync(struct irq_pin_list *entry) -{ - /* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ - struct io_apic __iomem *io_apic; - - io_apic = io_apic_base(entry->apic); - readl(&io_apic->data); -} - -static void mask_ioapic(struct irq_cfg *cfg) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void mask_ioapic_irq(struct irq_data *data) -{ - mask_ioapic(data->chip_data); -} - -static void __unmask_ioapic(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); -} - -static void unmask_ioapic(struct irq_cfg *cfg) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - __unmask_ioapic(cfg); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_ioapic_irq(struct irq_data *data) -{ - unmask_ioapic(data->chip_data); -} - -/* - * IO-APIC versions below 0x20 don't support EOI register. - * For the record, here is the information about various versions: - * 0Xh 82489DX - * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant - * 2Xh I/O(x)APIC which is PCI 2.2 Compliant - * 30h-FFh Reserved - * - * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic - * version as 0x2. This is an error with documentation and these ICH chips - * use io-apic's of version 0x20. - * - * For IO-APIC's with EOI register, we use that to do an explicit EOI. - * Otherwise, we simulate the EOI message manually by changing the trigger - * mode to edge and then back to level, with RTE being masked during this. - */ -static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) -{ - if (mpc_ioapic_ver(apic) >= 0x20) { - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - if (cfg && irq_remapped(cfg)) - io_apic_eoi(apic, pin); - else - io_apic_eoi(apic, vector); - } else { - struct IO_APIC_route_entry entry, entry1; - - entry = entry1 = __ioapic_read_entry(apic, pin); - - /* - * Mask the entry and change the trigger mode to edge. - */ - entry1.mask = 1; - entry1.trigger = IOAPIC_EDGE; - - __ioapic_write_entry(apic, pin, entry1); - - /* - * Restore the previous level triggered entry. - */ - __ioapic_write_entry(apic, pin, entry); - } -} - -static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) - __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - - /* - * Make sure the entry is masked and re-read the contents to check - * if it is a level triggered pin and if the remote-IRR is set. - */ - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - entry = ioapic_read_entry(apic, pin); - } - - if (entry.irr) { - unsigned long flags; - - /* - * Make sure the trigger mode is set to level. Explicit EOI - * doesn't clear the remote-IRR if the trigger mode is not - * set to level. - */ - if (!entry.trigger) { - entry.trigger = IOAPIC_LEVEL; - ioapic_write_entry(apic, pin, entry); - } - - raw_spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_pin(apic, pin, entry.vector, NULL); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - } - - /* - * Clear the rest of the bits in the IO-APIC RTE except for the mask - * bit. - */ - ioapic_mask_entry(apic, pin); - entry = ioapic_read_entry(apic, pin); - if (entry.irr) - printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", - mpc_ioapic_id(apic), pin); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#ifdef CONFIG_X86_32 -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries[MAX_PIRQS] = { - [0 ... MAX_PIRQS - 1] = -1 -}; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); -#endif /* CONFIG_X86_32 */ - -/* - * Saves all the IO-APIC RTE's - */ -int save_ioapic_entries(void) -{ - int apic, pin; - int err = 0; - - for (apic = 0; apic < nr_ioapics; apic++) { - if (!ioapics[apic].saved_registers) { - err = -ENOMEM; - continue; - } - - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) - ioapics[apic].saved_registers[pin] = - ioapic_read_entry(apic, pin); - } - - return err; -} - -/* - * Mask all IO APIC entries. - */ -void mask_ioapic_entries(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - if (!ioapics[apic].saved_registers) - continue; - - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { - struct IO_APIC_route_entry entry; - - entry = ioapics[apic].saved_registers[pin]; - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } - } -} - -/* - * Restore IO APIC entries which was saved in the ioapic structure. - */ -int restore_ioapic_entries(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - if (!ioapics[apic].saved_registers) - continue; - - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) - ioapic_write_entry(apic, pin, - ioapics[apic].saved_registers[pin]); - } - return 0; -} - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int ioapic_idx, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].irqtype == type && - (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) || - mp_irqs[i].dstapic == MP_APIC_ALL) && - mp_irqs[i].dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].irqtype == type) && - (mp_irqs[i].srcbusirq == irq)) - - return mp_irqs[i].dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].irqtype == type) && - (mp_irqs[i].srcbusirq == irq)) - break; - } - - if (i < mp_irq_entries) { - int ioapic_idx; - - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) - if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) - return ioapic_idx; - } - - return -1; -} - -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < legacy_pic->nr_legacy_irqs) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -#endif - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) -#define default_EISA_polarity(idx) default_ISA_polarity(idx) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) default_ISA_polarity(idx) - -static int irq_polarity(int idx) -{ - int bus = mp_irqs[idx].srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - polarity = default_ISA_polarity(idx); - else - polarity = default_PCI_polarity(idx); - break; - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int irq_trigger(int idx) -{ - int bus = mp_irqs[idx].srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif - break; - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq; - int bus = mp_irqs[idx].srcbus; - struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic); - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].srcbusirq; - } else { - u32 gsi = gsi_cfg->gsi_base + pin; - - if (gsi >= NR_IRQS_LEGACY) - irq = gsi; - else - irq = gsi_top + gsi; - } - -#ifdef CONFIG_X86_32 - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } -#endif - - return irq; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, - struct io_apic_irq_attr *irq_attr) -{ - int ioapic_idx, i, best_guess = -1; - - apic_printk(APIC_DEBUG, - "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, - "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].srcbus; - - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) - if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || - mp_irqs[i].dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); - - if (!(ioapic_idx || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].srcbusirq & 3)) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - return irq; - } - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) { - set_io_apic_irq_attr(irq_attr, ioapic_idx, - mp_irqs[i].dstirq, - irq_trigger(i), - irq_polarity(i)); - best_guess = irq; - } - } - } - return best_guess; -} -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -void lock_vector_lock(void) -{ - /* Used to the online set of cpus does not change - * during assign_irq_vector. - */ - raw_spin_lock(&vector_lock); -} - -void unlock_vector_lock(void) -{ - raw_spin_unlock(&vector_lock); -} - -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) -{ - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 8; - unsigned int old_vector; - int cpu, err; - cpumask_var_t tmp_mask; - - if (cfg->move_in_progress) - return -EBUSY; - - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; - - old_vector = cfg->vector; - if (old_vector) { - cpumask_and(tmp_mask, mask, cpu_online_mask); - cpumask_and(tmp_mask, cfg->domain, tmp_mask); - if (!cpumask_empty(tmp_mask)) { - free_cpumask_var(tmp_mask); - return 0; - } - } - - /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; - for_each_cpu_and(cpu, mask, cpu_online_mask) { - int new_cpu; - int vector, offset; - - apic->vector_allocation_domain(cpu, tmp_mask); - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - /* If out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - vector = FIRST_EXTERNAL_VECTOR + offset; - } - if (unlikely(current_vector == vector)) - continue; - - if (test_bit(vector, used_vectors)) - goto next; - - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) - goto next; - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; - cpumask_copy(cfg->old_domain, cfg->domain); - } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); - err = 0; - break; - } - free_cpumask_var(tmp_mask); - return err; -} - -int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) -{ - int err; - unsigned long flags; - - raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, cfg, mask); - raw_spin_unlock_irqrestore(&vector_lock, flags); - return err; -} - -static void __clear_irq_vector(int irq, struct irq_cfg *cfg) -{ - int cpu, vector; - - BUG_ON(!cfg->vector); - - vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = -1; - - cfg->vector = 0; - cpumask_clear(cfg->domain); - - if (likely(!cfg->move_in_progress)) - return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; - vector++) { - if (per_cpu(vector_irq, cpu)[vector] != irq) - continue; - per_cpu(vector_irq, cpu)[vector] = -1; - break; - } - } - cfg->move_in_progress = 0; -} - -void __setup_vector_irq(int cpu) -{ - /* Initialize vector_irq on a new cpu */ - int irq, vector; - struct irq_cfg *cfg; - - /* - * vector_lock will make sure that we don't run into irq vector - * assignments that might be happening on another cpu in parallel, - * while we setup our initial vector to irq mappings. - */ - raw_spin_lock(&vector_lock); - /* Mark the inuse vectors */ - for_each_active_irq(irq) { - cfg = irq_get_chip_data(irq); - if (!cfg) - continue; - /* - * If it is a legacy IRQ handled by the legacy PIC, this cpu - * will be part of the irq_cfg's domain. - */ - if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) - cpumask_set_cpu(cpu, cfg->domain); - - if (!cpumask_test_cpu(cpu, cfg->domain)) - continue; - vector = cfg->vector; - per_cpu(vector_irq, cpu)[vector] = irq; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq < 0) - continue; - - cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = -1; - } - raw_spin_unlock(&vector_lock); -} - -static struct irq_chip ioapic_chip; - -#ifdef CONFIG_X86_32 -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} -#else -static inline int IO_APIC_irq_trigger(int irq) -{ - return 1; -} -#endif - -static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, - unsigned long trigger) -{ - struct irq_chip *chip = &ioapic_chip; - irq_flow_handler_t hdl; - bool fasteoi; - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { - irq_set_status_flags(irq, IRQ_LEVEL); - fasteoi = true; - } else { - irq_clear_status_flags(irq, IRQ_LEVEL); - fasteoi = false; - } - - if (irq_remapped(cfg)) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); - fasteoi = trigger != 0; - } - - hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; - irq_set_chip_and_handler_name(irq, chip, hdl, - fasteoi ? "fasteoi" : "edge"); -} - - -static int setup_ir_ioapic_entry(int irq, - struct IR_IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - int index; - struct irte irte; - int ioapic_id = mpc_ioapic_id(attr->ioapic); - struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); - - if (!iommu) { - pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); - return -ENODEV; - } - - index = alloc_irte(iommu, irq, 1); - if (index < 0) { - pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); - return -ENOMEM; - } - - prepare_irte(&irte, vector, destination); - - /* Set source-id of interrupt request */ - set_ioapic_sid(&irte, ioapic_id); - - modify_irte(irq, &irte); - - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " - "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " - "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " - "Avail:%X Vector:%02X Dest:%08X " - "SID:%04X SQ:%X SVT:%X)\n", - attr->ioapic, irte.present, irte.fpd, irte.dst_mode, - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, - irte.avail, irte.vector, irte.dest_id, - irte.sid, irte.sq, irte.svt); - - memset(entry, 0, sizeof(*entry)); - - entry->index2 = (index >> 15) & 0x1; - entry->zero = 0; - entry->format = 1; - entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - entry->vector = attr->ioapic_pin; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - -static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - if (intr_remapping_enabled) - return setup_ir_ioapic_entry(irq, - (struct IR_IO_APIC_route_entry *)entry, - destination, vector, attr); - - memset(entry, 0, sizeof(*entry)); - - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->dest = destination; - entry->vector = vector; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* - * Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - -static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, - struct io_apic_irq_attr *attr) -{ - struct IO_APIC_route_entry entry; - unsigned int dest; - - if (!IO_APIC_IRQ(irq)) - return; - /* - * For legacy irqs, cfg->domain starts with cpu 0 for legacy - * controllers like 8259. Now that IO-APIC can handle this irq, update - * the cfg->domain. - */ - if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) - apic->vector_allocation_domain(0, cfg->domain); - - if (assign_irq_vector(irq, cfg, apic->target_cpus())) - return; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i Dest:%d)\n", - attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, - cfg->vector, irq, attr->trigger, attr->polarity, dest); - - if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - __clear_irq_vector(irq, cfg); - - return; - } - - ioapic_register_intr(irq, cfg, attr->trigger); - if (irq < legacy_pic->nr_legacy_irqs) - legacy_pic->mask(irq); - - ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); -} - -static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin) -{ - if (idx != -1) - return false; - - apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", - mpc_ioapic_id(ioapic_idx), pin); - return true; -} - -static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) -{ - int idx, node = cpu_to_node(0); - struct io_apic_irq_attr attr; - unsigned int pin, irq; - - for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) { - idx = find_irq_entry(ioapic_idx, pin, mp_INT); - if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) - continue; - - irq = pin_2_irq(idx, ioapic_idx, pin); - - if ((ioapic_idx > 0) && (irq > 16)) - continue; - - /* - * Skip the timer IRQ if there's a quirk handler - * installed and if it returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(ioapic_idx, irq)) - continue; - - set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), - irq_polarity(idx)); - - io_apic_setup_irq_pin(irq, node, &attr); - } -} - -static void __init setup_IO_APIC_irqs(void) -{ - unsigned int ioapic_idx; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) - __io_apic_setup_irqs(ioapic_idx); -} - -/* - * for the gsit that is not in first ioapic - * but could not use acpi_register_gsi() - * like some special sci in IBM x3330 - */ -void setup_IO_APIC_irq_extra(u32 gsi) -{ - int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0); - struct io_apic_irq_attr attr; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic_idx = mp_find_ioapic(gsi); - if (ioapic_idx < 0) - return; - - pin = mp_find_ioapic_pin(ioapic_idx, gsi); - idx = find_irq_entry(ioapic_idx, pin, mp_INT); - if (idx == -1) - return; - - irq = pin_2_irq(idx, ioapic_idx, pin); - - /* Only handle the non legacy irqs on secondary ioapics */ - if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY) - return; - - set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), - irq_polarity(idx)); - - io_apic_setup_irq_pin_once(irq, node, &attr); -} - -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, - unsigned int pin, int vector) -{ - struct IO_APIC_route_entry entry; - - if (intr_remapping_enabled) - return; - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = apic->irq_dest_mode; - entry.mask = 0; /* don't mask IRQ for edge */ - entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); - entry.delivery_mode = apic->irq_delivery_mode; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_idx, pin, entry); -} - -__apicdebuginit(void) print_IO_APIC(int ioapic_idx) -{ - int i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_idx, 0); - reg_01.raw = io_apic_read(ioapic_idx, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(ioapic_idx, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(ioapic_idx, 3); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %02X\n", - reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %02X\n", - reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - if (intr_remapping_enabled) { - printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" - " Pol Stat Indx2 Zero Vect:\n"); - } else { - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect:\n"); - } - - for (i = 0; i <= reg_01.bits.entries; i++) { - if (intr_remapping_enabled) { - struct IO_APIC_route_entry entry; - struct IR_IO_APIC_route_entry *ir_entry; - - entry = ioapic_read_entry(ioapic_idx, i); - ir_entry = (struct IR_IO_APIC_route_entry *) &entry; - printk(KERN_DEBUG " %02x %04X ", - i, - ir_entry->index - ); - printk("%1d %1d %1d %1d %1d " - "%1d %1d %X %02X\n", - ir_entry->format, - ir_entry->mask, - ir_entry->trigger, - ir_entry->irr, - ir_entry->polarity, - ir_entry->delivery_status, - ir_entry->index2, - ir_entry->zero, - ir_entry->vector - ); - } else { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(ioapic_idx, i); - printk(KERN_DEBUG " %02x %02X ", - i, - entry.dest - ); - printk("%1d %1d %1d %1d %1d " - "%1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } -} - -__apicdebuginit(void) print_IO_APICs(void) -{ - int ioapic_idx; - struct irq_cfg *cfg; - unsigned int irq; - struct irq_chip *chip; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mpc_ioapic_id(ioapic_idx), - ioapics[ioapic_idx].nr_registers); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) - print_IO_APIC(ioapic_idx); - - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_active_irq(irq) { - struct irq_pin_list *entry; - - chip = irq_get_chip(irq); - if (chip != &ioapic_chip) - continue; - - cfg = irq_get_chip_data(irq); - if (!cfg) - continue; - entry = cfg->irq_2_pin; - if (!entry) - continue; - printk(KERN_DEBUG "IRQ%d ", irq); - for_each_irq_pin(entry, cfg->irq_2_pin) - printk("-> %d:%d", entry->apic, entry->pin); - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); -} - -__apicdebuginit(void) print_APIC_field(int base) -{ - int i; - - printk(KERN_DEBUG); - - for (i = 0; i < 8; i++) - printk(KERN_CONT "%08x", apic_read(base + i*0x10)); - - printk(KERN_CONT "\n"); -} - -__apicdebuginit(void) print_local_APIC(void *dummy) -{ - unsigned int i, v, ver, maxlvt; - u64 icr; - - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", - smp_processor_id(), hard_smp_processor_id()); - v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); - v = apic_read(APIC_LVR); - printk(KERN_INFO "... APIC VERSION: %08x\n", v); - ver = GET_APIC_VERSION(v); - maxlvt = lapic_get_maxlvt(); - - v = apic_read(APIC_TASKPRI); - printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (!APIC_XAPIC(ver)) { - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - } - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); - } - - /* - * Remote read supported only in the 82489DX and local APIC for - * Pentium processors. - */ - if (!APIC_INTEGRATED(ver) || maxlvt == 3) { - v = apic_read(APIC_RRR); - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); - } - - v = apic_read(APIC_LDR); - printk(KERN_DEBUG "... APIC LDR: %08x\n", v); - if (!x2apic_enabled()) { - v = apic_read(APIC_DFR); - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); - } - v = apic_read(APIC_SPIV); - printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); - - printk(KERN_DEBUG "... APIC ISR field:\n"); - print_APIC_field(APIC_ISR); - printk(KERN_DEBUG "... APIC TMR field:\n"); - print_APIC_field(APIC_TMR); - printk(KERN_DEBUG "... APIC IRR field:\n"); - print_APIC_field(APIC_IRR); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - - if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { - v = apic_read(APIC_EFEAT); - maxlvt = (v >> 16) & 0xff; - printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); - v = apic_read(APIC_ECTRL); - printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); - for (i = 0; i < maxlvt; i++) { - v = apic_read(APIC_EILVTn(i)); - printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); - } - } - printk("\n"); -} - -__apicdebuginit(void) print_local_APICs(int maxcpu) -{ - int cpu; - - if (!maxcpu) - return; - - preempt_disable(); - for_each_online_cpu(cpu) { - if (cpu >= maxcpu) - break; - smp_call_function_single(cpu, print_local_APIC, NULL, 1); - } - preempt_enable(); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (!legacy_pic->nr_legacy_irqs) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -static int __initdata show_lapic = 1; -static __init int setup_show_lapic(char *arg) -{ - int num = -1; - - if (strcmp(arg, "all") == 0) { - show_lapic = CONFIG_NR_CPUS; - } else { - get_option(&arg, &num); - if (num >= 0) - show_lapic = num; - } - - return 1; -} -__setup("show_lapic=", setup_show_lapic); - -__apicdebuginit(int) print_ICs(void) -{ - if (apic_verbosity == APIC_QUIET) - return 0; - - print_PIC(); - - /* don't print out if apic is not there */ - if (!cpu_has_apic && !apic_from_smp_config()) - return 0; - - print_local_APICs(show_lapic); - print_IO_APICs(); - - return 0; -} - -late_initcall(print_ICs); - - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -void __init enable_IO_APIC(void) -{ - int i8259_apic, i8259_pin; - int apic; - - if (!legacy_pic->nr_legacy_irqs) - return; - - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - if (!legacy_pic->nr_legacy_irqs) - return; - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - * - * With interrupt-remapping, for now we will use virtual wire A mode, - * as virtual wire B is little complex (need to configure both - * IOAPIC RTE as well as interrupt-remapping table entry). - * As this gets called during crash dump, keep this simple for now. - */ - if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - - /* - * Use virtual wire A mode when interrupt remapping is enabled. - */ - if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!intr_remapping_enabled && - ioapic_i8259.pin != -1); -} - -#ifdef CONFIG_X86_32 -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 - */ -void __init setup_ioapic_ids_from_mpc_nocheck(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int ioapic_idx; - int i; - unsigned char old_id; - unsigned long flags; - - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { - /* Read the register 0 value */ - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_idx, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mpc_ioapic_id(ioapic_idx); - - if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (apic->check_apicid_used(&phys_id_present_map, - mpc_ioapic_id(ioapic_idx))) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - ioapics[ioapic_idx].mp_config.apicid = i; - } else { - physid_mask_t tmp; - apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx), - &tmp); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mpc_ioapic_id(ioapic_idx)); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mpc_ioapic_id(ioapic_idx)) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].dstapic == old_id) - mp_irqs[i].dstapic - = mpc_ioapic_id(ioapic_idx); - - /* - * Update the ID register according to the right value - * from the MPC table if they are different. - */ - if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID) - continue; - - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mpc_ioapic_id(ioapic_idx)); - - reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); - raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic_idx, 0, reg_00.raw); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - /* - * Sanity check - */ - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_idx, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} - -void __init setup_ioapic_ids_from_mpc(void) -{ - - if (acpi_ioapic) - return; - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - setup_ioapic_ids_from_mpc_nocheck(); -} -#endif - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - unsigned long flags; - - if (no_timer_check) - return 1; - - local_save_flags(flags); - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - local_irq_restore(flags); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - - /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) - return 1; - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - */ - -static unsigned int startup_ioapic_irq(struct irq_data *data) -{ - int was_pending = 0, irq = data->irq; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - if (irq < legacy_pic->nr_legacy_irqs) { - legacy_pic->mask(irq); - if (legacy_pic->irq_pending(irq)) - was_pending = 1; - } - __unmask_ioapic(data->chip_data); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -static int ioapic_retrigger_irq(struct irq_data *data) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned long flags; - - raw_spin_lock_irqsave(&vector_lock, flags); - apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} - -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -#ifdef CONFIG_SMP -void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(cfg)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - } -} - -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) -{ - struct irq_cfg *cfg = data->chip_data; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -1; - - if (assign_irq_vector(data->irq, data->chip_data, mask)) - return -1; - - cpumask_copy(data->affinity, mask); - - *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); - return 0; -} - -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - unsigned int dest, irq = data->irq; - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = __ioapic_set_affinity(data, mask, &dest); - if (!ret) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, data->chip_data); - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return ret; -} - -#ifdef CONFIG_IRQ_REMAP - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For both level and edge triggered, irq migration is a simple atomic - * update(of vector and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we eliminate the io-apic RTE modification (with the - * updated vector information), by using a virtual vector (io-apic pin number). - * Real vector that is used for interrupting cpu will be coming from - * the interrupt-remapping table entry. - * - * As the migration is a simple atomic update of IRTE, the same mechanism - * is used to migrate MSI irq's in the presence of interrupt-remapping. - */ -static int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - if (get_irte(irq, &irte)) - return -EBUSY; - - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - cpumask_copy(data->affinity, mask); - return 0; -} - -#else -static inline int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - return 0; -} -#endif - -asmlinkage void smp_irq_move_cleanup_interrupt(void) -{ - unsigned vector, me; - - ack_APIC_irq(); - irq_enter(); - exit_idle(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; - unsigned int irr; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __this_cpu_read(vector_irq[vector]); - - if (irq == -1) - continue; - - desc = irq_to_desc(irq); - if (!desc) - continue; - - cfg = irq_cfg(irq); - raw_spin_lock(&desc->lock); - - /* - * Check if the irq migration is in progress. If so, we - * haven't received the cleanup request yet for this irq. - */ - if (cfg->move_in_progress) - goto unlock; - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - goto unlock; - - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - /* - * Check if the vector that needs to be cleanedup is - * registered at the cpu's IRR. If so, then this is not - * the best time to clean it up. Lets clean it up in the - * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to myself. - */ - if (irr & (1 << (vector % 32))) { - apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - goto unlock; - } - __this_cpu_write(vector_irq[vector], -1); -unlock: - raw_spin_unlock(&desc->lock); - } - - irq_exit(); -} - -static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) -{ - unsigned me; - - if (likely(!cfg->move_in_progress)) - return; - - me = smp_processor_id(); - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); -} - -static void irq_complete_move(struct irq_cfg *cfg) -{ - __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); -} - -void irq_force_complete_move(int irq) -{ - struct irq_cfg *cfg = irq_get_chip_data(irq); - - if (!cfg) - return; - - __irq_complete_move(cfg, cfg->vector); -} -#else -static inline void irq_complete_move(struct irq_cfg *cfg) { } -#endif - -static void ack_apic_edge(struct irq_data *data) -{ - irq_complete_move(data->chip_data); - irq_move_irq(data); - ack_APIC_irq(); -} - -atomic_t irq_mis_count; - -#ifdef CONFIG_GENERIC_PENDING_IRQ -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) -{ - /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic(cfg); - return true; - } - return false; -} - -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) -{ - if (unlikely(masked)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(cfg)) - irq_move_masked_irq(data); - unmask_ioapic(cfg); - } -} -#else -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) -{ - return false; -} -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) -{ -} -#endif - -static void ack_apic_level(struct irq_data *data) -{ - struct irq_cfg *cfg = data->chip_data; - int i, irq = data->irq; - unsigned long v; - bool masked; - - irq_complete_move(cfg); - masked = ioapic_irqd_mask(data, cfg); - - /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - * - * Also in the case when cpu goes offline, fixup_irqs() will forward - * any unhandled interrupt on the offlined cpu to the new cpu - * destination that is handling the corresponding interrupt. This - * interrupt forwarding is done via IPI's. Hence, in this case also - * level-triggered io-apic interrupt will be seen as an edge - * interrupt in the IRR. And we can't rely on the cpu's EOI - * to be broadcasted to the IO-APIC's which will clear the remoteIRR - * corresponding to the level-triggered interrupt. Hence on IO-APIC's - * supporting EOI register, we do an explicit EOI to clear the - * remote IRR and on IO-APIC's which don't have an EOI register, - * we use the above logic (mask+edge followed by unmask+level) from - * Manfred Spraul to clear the remote IRR. - */ - i = cfg->vector; - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - /* - * We must acknowledge the irq before we move it or the acknowledge will - * not propagate properly. - */ - ack_APIC_irq(); - - /* - * Tail end of clearing remote IRR bit (either by delivering the EOI - * message via io-apic EOI register write or simulating it using - * mask+edge followed by unnask+level logic) manually when the - * level triggered interrupt is seen as the edge triggered interrupt - * at the cpu. - */ - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - - eoi_ioapic_irq(irq, cfg); - } - - ioapic_irqd_unmask(data, cfg, masked); -} - -#ifdef CONFIG_IRQ_REMAP -static void ir_ack_apic_edge(struct irq_data *data) -{ - ack_APIC_irq(); -} - -static void ir_ack_apic_level(struct irq_data *data) -{ - ack_APIC_irq(); - eoi_ioapic_irq(data->irq, data->chip_data); -} - -static void ir_print_prefix(struct irq_data *data, struct seq_file *p) -{ - seq_printf(p, " IR-%s", data->chip->name); -} - -static void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ - chip->irq_print_chip = ir_print_prefix; - chip->irq_ack = ir_ack_apic_edge; - chip->irq_eoi = ir_ack_apic_level; - -#ifdef CONFIG_SMP - chip->irq_set_affinity = ir_ioapic_set_affinity; -#endif -} -#endif /* CONFIG_IRQ_REMAP */ - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .irq_startup = startup_ioapic_irq, - .irq_mask = mask_ioapic_irq, - .irq_unmask = unmask_ioapic_irq, - .irq_ack = ack_apic_edge, - .irq_eoi = ack_apic_level, -#ifdef CONFIG_SMP - .irq_set_affinity = ioapic_set_affinity, -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - -static inline void init_IO_APIC_traps(void) -{ - struct irq_cfg *cfg; - unsigned int irq; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for_each_active_irq(irq) { - cfg = irq_get_chip_data(irq); - if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < legacy_pic->nr_legacy_irqs) - legacy_pic->make_irq(irq); - else - /* Strange. Oh, well.. */ - irq_set_chip(irq, &no_irq_chip); - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void mask_lapic_irq(struct irq_data *data) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq(struct irq_data *data) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static void ack_lapic_irq(struct irq_data *data) -{ - ack_APIC_irq(); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC", - .irq_mask = mask_lapic_irq, - .irq_unmask = unmask_lapic_irq, - .irq_ack = ack_lapic_irq, -}; - -static void lapic_register_intr(int irq) -{ - irq_clear_status_flags(irq, IRQ_LEVEL); - irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, - "edge"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void __init unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -static int disable_timer_pin_1 __initdata; -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", disable_timer_pin_setup); - -int timer_through_8259 __initdata; - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for all platforms. - */ -static inline void __init check_timer(void) -{ - struct irq_cfg *cfg = irq_get_chip_data(0); - int node = cpu_to_node(0); - int apic1, pin1, apic2, pin2; - unsigned long flags; - int no_pin1 = 0; - - local_irq_save(flags); - - /* - * get/set the timer IRQ vector: - */ - legacy_pic->mask(0); - assign_irq_vector(0, cfg, apic->target_cpus()); - - /* - * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. Also - * timer interrupts need to be acknowledged manually in - * the 8259A for the i82489DX when using the NMI - * watchdog as that APIC treats NMIs as level-triggered. - * The AEOI mode will finish them in the 8259A - * automatically. - */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - legacy_pic->init(1); - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " - "apic1=%d pin1=%d apic2=%d pin2=%d\n", - cfg->vector, apic1, pin1, apic2, pin2); - - /* - * Some BIOS writers are clueless and report the ExtINTA - * I/O APIC input from the cascaded 8259A as the timer - * interrupt input. So just in case, if only one pin - * was found above, try it both directly and through the - * 8259A. - */ - if (pin1 == -1) { - if (intr_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); - pin1 = pin2; - apic1 = apic2; - no_pin1 = 1; - } else if (pin2 == -1) { - pin2 = pin1; - apic2 = apic1; - } - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - if (no_pin1) { - add_pin_to_irq_node(cfg, node, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); - } else { - /* for edge trigger, setup_ioapic_irq already - * leave it unmasked. - * so only need to unmask if it is level-trigger - * do we really have level trigger timer? - */ - int idx; - idx = find_irq_entry(apic1, pin1, mp_INT); - if (idx != -1 && irq_trigger(idx)) - unmask_ioapic(cfg); - } - if (timer_irq_works()) { - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - goto out; - } - if (intr_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); - local_irq_disable(); - clear_IO_APIC_pin(apic1, pin1); - if (!no_pin1) - apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); - - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " - "(IRQ0) through the 8259A ...\n"); - apic_printk(APIC_QUIET, KERN_INFO - "..... (found apic %d pin %d) ...\n", apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - legacy_pic->unmask(0); - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; - goto out; - } - /* - * Cleanup, just in case ... - */ - local_irq_disable(); - legacy_pic->mask(0); - clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); - } - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as Virtual Wire IRQ...\n"); - - lapic_register_intr(0); - apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ - legacy_pic->unmask(0); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - local_irq_disable(); - legacy_pic->mask(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); - apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as ExtINT IRQ...\n"); - - legacy_pic->init(0); - legacy_pic->make_irq(0); - apic_write(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - local_irq_disable(); - apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - if (x2apic_preenabled) - apic_printk(APIC_QUIET, KERN_INFO - "Perhaps problem with the pre-enabled x2apic mode\n" - "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option.\n"); -out: - local_irq_restore(flags); -} - -/* - * Traditionally ISA IRQ2 is the cascade IRQ, and is not available - * to devices. However there may be an I/O APIC pin available for - * this interrupt regardless. The pin may be left unconnected, but - * typically it will be reused as an ExtINT cascade interrupt for - * the master 8259A. In the MPS case such a pin will normally be - * reported as an ExtINT interrupt in the MP table. With ACPI - * there is no provision for ExtINT interrupts, and in the absence - * of an override it would be treated as an ordinary ISA I/O APIC - * interrupt, that is edge-triggered and unmasked by default. We - * used to do this, but it caused problems on some systems because - * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using - * the same ExtINT cascade interrupt to drive the local APIC of the - * bootstrap processor. Therefore we refrain from routing IRQ2 to - * the I/O APIC in all cases now. No actual device should request - * it anyway. --macro - */ -#define PIC_IRQS (1UL << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - - /* - * calling enable_IO_APIC() is moved to setup_local_APIC for BP - */ - io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; - - apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - /* - * Set up IO-APIC IRQ routing. - */ - x86_init.mpparse.setup_ioapic_ids(); - - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - if (legacy_pic->nr_legacy_irqs) - check_timer(); -} - -/* - * Called after all the initialization is done. If we didn't find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -static void resume_ioapic_id(int ioapic_idx) -{ - unsigned long flags; - union IO_APIC_reg_00 reg_00; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_idx, 0); - if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) { - reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); - io_apic_write(ioapic_idx, 0, reg_00.raw); - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void ioapic_resume(void) -{ - int ioapic_idx; - - for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) - resume_ioapic_id(ioapic_idx); - - restore_ioapic_entries(); -} - -static struct syscore_ops ioapic_syscore_ops = { - .suspend = save_ioapic_entries, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_ops(void) -{ - register_syscore_ops(&ioapic_syscore_ops); - - return 0; -} - -device_initcall(ioapic_init_ops); - -/* - * Dynamic irq allocate and deallocation - */ -unsigned int create_irq_nr(unsigned int from, int node) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int ret = 0; - int irq; - - if (from < nr_irqs_gsi) - from = nr_irqs_gsi; - - irq = alloc_irq_from(from, node); - if (irq < 0) - return 0; - cfg = alloc_irq_cfg(irq, node); - if (!cfg) { - free_irq_at(irq, NULL); - return 0; - } - - raw_spin_lock_irqsave(&vector_lock, flags); - if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) - ret = irq; - raw_spin_unlock_irqrestore(&vector_lock, flags); - - if (ret) { - irq_set_chip_data(irq, cfg); - irq_clear_status_flags(irq, IRQ_NOREQUEST); - } else { - free_irq_at(irq, cfg); - } - return ret; -} - -int create_irq(void) -{ - int node = cpu_to_node(0); - unsigned int irq_want; - int irq; - - irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want, node); - - if (irq == 0) - irq = -1; - - return irq; -} - -void destroy_irq(unsigned int irq) -{ - struct irq_cfg *cfg = irq_get_chip_data(irq); - unsigned long flags; - - irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); - - if (irq_remapped(cfg)) - free_irte(irq); - raw_spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); - raw_spin_unlock_irqrestore(&vector_lock, flags); - free_irq_at(irq, cfg); -} - -/* - * MSI message composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); - - if (irq_remapped(cfg)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - prepare_irte(&irte, cfg->vector, dest); - - /* Set source-id of interrupt request */ - if (pdev) - set_msi_sid(&irte, pdev); - else - set_hpet_sid(&irte, hpet_id); - - modify_irte(irq, &irte); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else { - if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; - - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } - return err; -} - -#ifdef CONFIG_SMP -static int -msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - struct msi_msg msg; - unsigned int dest; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - __get_cached_msi_msg(data->msi_desc, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - __write_msi_msg(data->msi_desc, &msg); - - return 0; -} -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .irq_unmask = unmask_msi_irq, - .irq_mask = mask_msi_irq, - .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = msi_set_affinity, -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} - -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) -{ - struct irq_chip *chip = &msi_chip; - struct msi_msg msg; - int ret; - - ret = msi_compose_msg(dev, irq, &msg, -1); - if (ret < 0) - return ret; - - irq_set_msi_desc(irq, msidesc); - write_msi_msg(irq, &msg); - - if (irq_remapped(irq_get_chip_data(irq))) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); - } - - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); - - return 0; -} - -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - int node, ret, sub_handle, index = 0; - unsigned int irq, irq_want; - struct msi_desc *msidesc; - struct intel_iommu *iommu = NULL; - - /* x86 doesn't support multiple MSI yet */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - - node = dev_to_node(&dev->dev); - irq_want = nr_irqs_gsi; - sub_handle = 0; - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want, node); - if (irq == 0) - return -1; - irq_want = irq + 1; - if (!intr_remapping_enabled) - goto no_ir; - - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_irte(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; - goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); - } -no_ir: - ret = setup_msi_irq(dev, msidesc, irq); - if (ret < 0) - goto error; - sub_handle++; - } - return 0; - -error: - destroy_irq(irq); - return ret; -} - -void native_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#ifdef CONFIG_DMAR_TABLE -#ifdef CONFIG_SMP -static int -dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct msi_msg msg; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); - - dmar_msi_write(irq, &msg); - - return 0; -} - -#endif /* CONFIG_SMP */ - -static struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .irq_unmask = dmar_msi_unmask, - .irq_mask = dmar_msi_mask, - .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = dmar_msi_set_affinity, -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_dmar_msi(unsigned int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg, -1); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} -#endif - -#ifdef CONFIG_HPET_TIMER - -#ifdef CONFIG_SMP -static int hpet_msi_set_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - struct msi_msg msg; - unsigned int dest; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - hpet_msi_read(data->handler_data, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - hpet_msi_write(data->handler_data, &msg); - - return 0; -} - -#endif /* CONFIG_SMP */ - -static struct irq_chip hpet_msi_type = { - .name = "HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, - .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = hpet_msi_set_affinity, -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_hpet_msi(unsigned int irq, unsigned int id) -{ - struct irq_chip *chip = &hpet_msi_type; - struct msi_msg msg; - int ret; - - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_hpet_to_ir(id); - int index; - - if (!iommu) - return -1; - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - return -1; - } - - ret = msi_compose_msg(NULL, irq, &msg, id); - if (ret < 0) - return ret; - - hpet_msi_write(irq_get_handler_data(irq), &msg); - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (irq_remapped(irq_get_chip_data(irq))) - irq_remap_modify_chip_defaults(chip); - - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - return 0; -} -#endif - -#endif /* CONFIG_PCI_MSI */ -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static int -ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - target_ht_irq(data->irq, dest, cfg->vector); - return 0; -} - -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .irq_mask = mask_ht_irq, - .irq_unmask = unmask_ht_irq, - .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = ht_set_affinity, -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - struct irq_cfg *cfg; - int err; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus()); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); - } - return err; -} -#endif /* CONFIG_HT_IRQ */ - -static int -io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) -{ - struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); - int ret; - - if (!cfg) - return -EINVAL; - ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); - if (!ret) - setup_ioapic_irq(irq, cfg, attr); - return ret; -} - -int io_apic_setup_irq_pin_once(unsigned int irq, int node, - struct io_apic_irq_attr *attr) -{ - unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; - int ret; - - /* Avoid redundant programming */ - if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", - mpc_ioapic_id(ioapic_idx), pin); - return 0; - } - ret = io_apic_setup_irq_pin(irq, node, attr); - if (!ret) - set_bit(pin, ioapics[ioapic_idx].pin_programmed); - return ret; -} - -static int __init io_apic_get_redir_entries(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - /* The register returns the maximum index redir index - * supported, which is one less than the total number of redir - * entries. - */ - return reg_01.bits.entries + 1; -} - -static void __init probe_nr_irqs_gsi(void) -{ - int nr; - - nr = gsi_top + NR_IRQS_LEGACY; - if (nr > nr_irqs_gsi) - nr_irqs_gsi = nr; - - printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); -} - -int get_nr_irqs_gsi(void) -{ - return nr_irqs_gsi; -} - -int __init arch_probe_nr_irqs(void) -{ - int nr; - - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; - - nr = nr_irqs_gsi + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) - /* - * for MSI and HT dyn irq - */ - nr += nr_irqs_gsi * 16; -#endif - if (nr < nr_irqs) - nr_irqs = nr; - - return NR_IRQS_LEGACY; -} - -int io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) -{ - int node; - - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - irq_attr->ioapic); - return -EINVAL; - } - - node = dev ? dev_to_node(dev) : cpu_to_node(0); - - return io_apic_setup_irq_pin_once(irq, node, irq_attr); -} - -#ifdef CONFIG_X86_32 -static int __init io_apic_get_unique_id(int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); - - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (apic->check_apicid_used(&apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(&apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - apic->apicid_to_cpu_present(apic_id, &tmp); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - -static u8 __init io_apic_unique_id(u8 id) -{ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -} -#else -static u8 __init io_apic_unique_id(u8 id) -{ - int i; - DECLARE_BITMAP(used, 256); - - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - __set_bit(mpc_ioapic_id(i), used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); -} -#endif - -static int __init io_apic_get_version(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} - -int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) -{ - int ioapic, pin, idx; - - if (skip_ioapic_setup) - return -1; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return -1; - - pin = mp_find_ioapic_pin(ioapic, gsi); - if (pin < 0) - return -1; - - idx = find_irq_entry(ioapic, pin, mp_INT); - if (idx < 0) - return -1; - - *trigger = irq_trigger(idx); - *polarity = irq_polarity(idx); - return 0; -} - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be apic->target_cpus() - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - const struct cpumask *mask; - struct irq_data *idata; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) - for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - if ((ioapic > 0) && (irq > 16)) - continue; - - idata = irq_get_irq_data(irq); - - /* - * Honour affinities which have been set in early boot - */ - if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) - mask = idata->affinity; - else - mask = apic->target_cpus(); - - if (intr_remapping_enabled) - ir_ioapic_set_affinity(idata, mask, false); - else - ioapic_set_affinity(idata, mask, false); - } - -} -#endif - -#define IOAPIC_RESOURCE_NAME_SIZE 11 - -static struct resource *ioapic_resources; - -static struct resource * __init ioapic_setup_resources(int nr_ioapics) -{ - unsigned long n; - struct resource *res; - char *mem; - int i; - - if (nr_ioapics <= 0) - return NULL; - - n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; - - mem = alloc_bootmem(n); - res = (void *)mem; - - mem += sizeof(struct resource) * nr_ioapics; - - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } - - ioapic_resources = res; - - return res; -} - -void __init ioapic_and_gsi_init(void) -{ - io_apic_ops.init(); -} - -static void __init __ioapic_init_mappings(void) -{ - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - struct resource *ioapic_res; - int i; - - ioapic_res = ioapic_setup_resources(nr_ioapics); - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mpc_ioapic_addr(i); -#ifdef CONFIG_X86_32 - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } -#endif - } else { -#ifdef CONFIG_X86_32 -fake_ioapic_page: -#endif - ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), - ioapic_phys); - idx++; - - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; - ioapic_res++; - } - - probe_nr_irqs_gsi(); -} - -void __init ioapic_insert_resources(void) -{ - int i; - struct resource *r = ioapic_resources; - - if (!r) { - if (nr_ioapics > 0) - printk(KERN_ERR - "IO APIC resources couldn't be allocated.\n"); - return; - } - - for (i = 0; i < nr_ioapics; i++) { - insert_resource(&iomem_resource, r); - r++; - } -} - -int mp_find_ioapic(u32 gsi) -{ - int i = 0; - - if (nr_ioapics == 0) - return -1; - - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); - if ((gsi >= gsi_cfg->gsi_base) - && (gsi <= gsi_cfg->gsi_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - return -1; -} - -int mp_find_ioapic_pin(int ioapic, u32 gsi) -{ - struct mp_ioapic_gsi *gsi_cfg; - - if (WARN_ON(ioapic == -1)) - return -1; - - gsi_cfg = mp_ioapic_gsi_routing(ioapic); - if (WARN_ON(gsi > gsi_cfg->gsi_end)) - return -1; - - return gsi - gsi_cfg->gsi_base; -} - -static __init int bad_ioapic(unsigned long address) -{ - if (nr_ioapics >= MAX_IO_APICS) { - pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n", - MAX_IO_APICS, nr_ioapics); - return 1; - } - if (!address) { - pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n"); - return 1; - } - return 0; -} - -static __init int bad_ioapic_register(int idx) -{ - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - - reg_00.raw = io_apic_read(idx, 0); - reg_01.raw = io_apic_read(idx, 1); - reg_02.raw = io_apic_read(idx, 2); - - if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) { - pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n", - mpc_ioapic_addr(idx)); - return 1; - } - - return 0; -} - -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) -{ - int idx = 0; - int entries; - struct mp_ioapic_gsi *gsi_cfg; - - if (bad_ioapic(address)) - return; - - idx = nr_ioapics; - - ioapics[idx].mp_config.type = MP_IOAPIC; - ioapics[idx].mp_config.flags = MPC_APIC_USABLE; - ioapics[idx].mp_config.apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - - if (bad_ioapic_register(idx)) { - clear_fixmap(FIX_IO_APIC_BASE_0 + idx); - return; - } - - ioapics[idx].mp_config.apicid = io_apic_unique_id(id); - ioapics[idx].mp_config.apicver = io_apic_get_version(idx); - - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - entries = io_apic_get_redir_entries(idx); - gsi_cfg = mp_ioapic_gsi_routing(idx); - gsi_cfg->gsi_base = gsi_base; - gsi_cfg->gsi_end = gsi_base + entries - 1; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - ioapics[idx].nr_registers = entries; - - if (gsi_cfg->gsi_end >= gsi_top) - gsi_top = gsi_cfg->gsi_end + 1; - - pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", - idx, mpc_ioapic_id(idx), - mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), - gsi_cfg->gsi_base, gsi_cfg->gsi_end); - - nr_ioapics++; -} - -/* Enable IOAPIC early just for system timer */ -void __init pre_init_apic_IRQ0(void) -{ - struct io_apic_irq_attr attr = { 0, 0, 0, 0 }; - - printk(KERN_INFO "Early APIC setup for system timer0\n"); -#ifndef CONFIG_SMP - physid_set_mask_of_physid(boot_cpu_physical_apicid, - &phys_cpu_present_map); -#endif - setup_local_APIC(); - - io_apic_setup_irq_pin(0, 0, &attr); - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/ipi.c b/ANDROID_3.4.5/arch/x86/kernel/apic/ipi.c deleted file mode 100644 index cce91bf2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/ipi.c +++ /dev/null @@ -1,167 +0,0 @@ -#include <linux/cpumask.h> -#include <linux/interrupt.h> -#include <linux/init.h> - -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/spinlock.h> -#include <linux/kernel_stat.h> -#include <linux/mc146818rtc.h> -#include <linux/cache.h> -#include <linux/cpu.h> -#include <linux/module.h> - -#include <asm/smp.h> -#include <asm/mtrr.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/apic.h> -#include <asm/proto.h> -#include <asm/ipi.h> - -void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) -{ - unsigned long query_cpu; - unsigned long flags; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicast to each CPU instead. - * - mbligh - */ - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - -void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, - int vector) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int query_cpu; - unsigned long flags; - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - -#ifdef CONFIG_X86_32 - -void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __default_send_IPI_dest_field( - early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->dest_logical); - local_irq_restore(flags); -} - -void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, - int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field( - early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->dest_logical); - } - local_irq_restore(flags); -} - -/* - * This is only used on smaller machines. - */ -void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - unsigned long flags; - - if (WARN_ONCE(!mask, "empty IPI mask")) - return; - - local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __default_send_IPI_dest_field(mask, vector, apic->dest_logical); - local_irq_restore(flags); -} - -void default_send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (!(num_online_cpus() > 1)) - return; - - __default_local_send_IPI_allbutself(vector); -} - -void default_send_IPI_all(int vector) -{ - __default_local_send_IPI_all(vector); -} - -void default_send_IPI_self(int vector) -{ - __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); -} - -/* must come after the send_IPI functions above for inlining */ -static int convert_apicid_to_cpu(int apic_id) -{ - int i; - - for_each_possible_cpu(i) { - if (per_cpu(x86_cpu_to_apicid, i) == apic_id) - return i; - } - return -1; -} - -int safe_smp_processor_id(void) -{ - int apicid, cpuid; - - if (!cpu_has_apic) - return 0; - - apicid = hard_smp_processor_id(); - if (apicid == BAD_APICID) - return 0; - - cpuid = convert_apicid_to_cpu(apicid); - - return cpuid >= 0 ? cpuid : 0; -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/numaq_32.c b/ANDROID_3.4.5/arch/x86/kernel/apic/numaq_32.c deleted file mode 100644 index 00d2422c..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/numaq_32.c +++ /dev/null @@ -1,542 +0,0 @@ -/* - * Written by: Patricia Gaughen, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <gone@us.ibm.com> - */ -#include <linux/nodemask.h> -#include <linux/topology.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/kernel.h> -#include <linux/mmzone.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/numa.h> -#include <linux/smp.h> -#include <linux/io.h> -#include <linux/mm.h> - -#include <asm/processor.h> -#include <asm/fixmap.h> -#include <asm/mpspec.h> -#include <asm/numaq.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/e820.h> -#include <asm/ipi.h> - -int found_numaq; - -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ -struct mpc_trans { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -static int mpc_record; - -static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; - -int mp_bus_id_to_node[MAX_MP_BUSSES]; -int mp_bus_id_to_local[MAX_MP_BUSSES]; -int quad_local_to_mp_bus_id[NR_CPUS/4][4]; - - -static inline void numaq_register_node(int node, struct sys_cfg_data *scd) -{ - struct eachquadmem *eq = scd->eq + node; - u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20; - u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20; - int ret; - - node_set(node, numa_nodes_parsed); - ret = numa_add_memblk(node, start, end); - BUG_ON(ret < 0); -} - -/* - * Function: smp_dump_qct() - * - * Description: gets memory layout from the quad config table. This - * function also updates numa_nodes_parsed with the nodes (quads) present. - */ -static void __init smp_dump_qct(void) -{ - struct sys_cfg_data *scd; - int node; - - scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); - - for_each_node(node) { - if (scd->quads_present31_0 & (1 << node)) - numaq_register_node(node, scd); - } -} - -void __cpuinit numaq_tsc_disable(void) -{ - if (!found_numaq) - return; - - if (num_online_nodes() > 1) { - printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); - setup_clear_cpu_cap(X86_FEATURE_TSC); - } -} - -static void __init numaq_tsc_init(void) -{ - numaq_tsc_disable(); -} - -static inline int generate_logical_apicid(int quad, int phys_apicid) -{ - return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); -} - -/* x86_quirks member */ -static int mpc_apic_id(struct mpc_cpu *m) -{ - int quad = translation_table[mpc_record]->trans_quad; - int logical_apicid = generate_logical_apicid(quad, m->apicid); - - printk(KERN_DEBUG - "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, - (m->cpufeature & CPU_MODEL_MASK) >> 4, - m->apicver, quad, logical_apicid); - - return logical_apicid; -} - -/* x86_quirks member */ -static void mpc_oem_bus_info(struct mpc_bus *m, char *name) -{ - int quad = translation_table[mpc_record]->trans_quad; - int local = translation_table[mpc_record]->trans_local; - - mp_bus_id_to_node[m->busid] = quad; - mp_bus_id_to_local[m->busid] = local; - - printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); -} - -/* x86_quirks member */ -static void mpc_oem_pci_bus(struct mpc_bus *m) -{ - int quad = translation_table[mpc_record]->trans_quad; - int local = translation_table[mpc_record]->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->busid; -} - -/* - * Called from mpparse code. - * mode = 0: prescan - * mode = 1: one mpc entry scanned - */ -static void numaq_mpc_record(unsigned int mode) -{ - if (!mode) - mpc_record = 0; - else - mpc_record++; -} - -static void __init MP_translation_info(struct mpc_trans *m) -{ - printk(KERN_INFO - "Translation: record %d, type %d, quad %d, global %d, local %d\n", - mpc_record, m->trans_type, m->trans_quad, m->trans_global, - m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -/* - * Read/parse the MPC oem tables - */ -static void __init smp_read_mpc_oem(struct mpc_table *mpc) -{ - struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; - int count = sizeof(*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable) + count; - - mpc_record = 0; - printk(KERN_INFO - "Found an OEM MPC table at %8p - parsing it...\n", oemtable); - - if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { - printk(KERN_WARNING - "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->signature[0], oemtable->signature[1], - oemtable->signature[2], oemtable->signature[3]); - return; - } - - if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - - while (count < oemtable->length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_trans *m = (void *)oemptr; - - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - printk(KERN_WARNING - "Unrecognised OEM table entry type! - %d\n", - (int)*oemptr); - return; - } - } -} - -static __init void early_check_numaq(void) -{ - /* - * get boot-time SMP configuration: - */ - if (smp_found_config) - early_get_smp_config(); - - if (found_numaq) { - x86_init.mpparse.mpc_record = numaq_mpc_record; - x86_init.mpparse.setup_ioapic_ids = x86_init_noop; - x86_init.mpparse.mpc_apic_id = mpc_apic_id; - x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; - x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; - x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; - x86_init.timers.tsc_pre_init = numaq_tsc_init; - x86_init.pci.init = pci_numaq_init; - } -} - -int __init numaq_numa_init(void) -{ - early_check_numaq(); - if (!found_numaq) - return -ENOENT; - smp_dump_qct(); - - return 0; -} - -#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline unsigned int numaq_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0x0F; -} - -static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_logical(mask, vector); -} - -static inline void numaq_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); -} - -static inline void numaq_send_IPI_all(int vector) -{ - numaq_send_IPI_mask(cpu_online_mask, vector); -} - -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it: - */ -static inline void numaq_smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline const struct cpumask *numaq_target_cpus(void) -{ - return cpu_all_mask; -} - -static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -static inline unsigned long numaq_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static inline int numaq_apic_id_registered(void) -{ - return 1; -} - -static inline void numaq_init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void numaq_setup_apic_routing(void) -{ - printk(KERN_INFO - "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n", - nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int numaq_multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL, retmap); -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int numaq_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int numaq_apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static int numaq_numa_cpu_node(int cpu) -{ - int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - - if (logical_apicid != BAD_APICID) - return numaq_apicid_to_node(logical_apicid); - return NUMA_NO_NODE; -} - -static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) -{ - int node = numaq_apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - physid_set_mask_of_physid(cpu + 4*node, retmap); -} - -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; - -static inline int numaq_check_phys_apicid_present(int phys_apicid) -{ - return 1; -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return 0x0F; -} - -static inline unsigned int -numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - return 0x0F; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static int -numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk(KERN_ERR "Warning! Not a NUMA-Q system!\n"); - else - found_numaq = 1; - - return found_numaq; -} - -static int probe_numaq(void) -{ - /* already know from get_memcfg_numaq() */ - return found_numaq; -} - -static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -static void numaq_setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk(KERN_INFO - "Remapping cross-quad port I/O for %d quads\n", num_quads); - - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - - printk(KERN_INFO - "xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - -/* Use __refdata to keep false positive warning calm. */ -static struct apic __refdata apic_numaq = { - - .name = "NUMAQ", - .probe = probe_numaq, - .acpi_madt_oem_check = NULL, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = numaq_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* physical delivery on LOCAL quad: */ - .irq_dest_mode = 0, - - .target_cpus = numaq_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = numaq_check_apicid_used, - .check_apicid_present = numaq_check_apicid_present, - - .vector_allocation_domain = numaq_vector_allocation_domain, - .init_apic_ldr = numaq_init_apic_ldr, - - .ioapic_phys_id_map = numaq_ioapic_phys_id_map, - .setup_apic_routing = numaq_setup_apic_routing, - .multi_timer_check = numaq_multi_timer_check, - .cpu_present_to_apicid = numaq_cpu_present_to_apicid, - .apicid_to_cpu_present = numaq_apicid_to_cpu_present, - .setup_portio_remap = numaq_setup_portio_remap, - .check_phys_apicid_present = numaq_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = numaq_mps_oem_check, - - .get_apic_id = numaq_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, - - .send_IPI_mask = numaq_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = numaq_send_IPI_allbutself, - .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi, - .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, - - /* We don't do anything here because we use NMI's to boot instead */ - .wait_for_init_deassert = NULL, - - .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .inquire_remote_apic = NULL, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, - .x86_32_numa_cpu_node = numaq_numa_cpu_node, -}; - -apic_driver(apic_numaq); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/probe_32.c b/ANDROID_3.4.5/arch/x86/kernel/apic/probe_32.c deleted file mode 100644 index ff2c1b9a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/probe_32.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Default generic APIC driver. This handles up to 8 CPUs. - * - * Copyright 2003 Andi Kleen, SuSE Labs. - * Subject to the GNU Public License, v.2 - * - * Generic x86 APIC driver probe layer. - */ -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <asm/fixmap.h> -#include <asm/mpspec.h> -#include <asm/apicdef.h> -#include <asm/apic.h> -#include <asm/setup.h> - -#include <linux/smp.h> -#include <asm/ipi.h> - -#include <linux/interrupt.h> -#include <asm/acpi.h> -#include <asm/e820.h> - -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -int no_broadcast = DEFAULT_SEND_IPI; - -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - pr_info("Using %s mode\n", - no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); - return 1; -} -__setup("no_ipi_broadcast=", no_ipi_broadcast); - -static int __init print_ipi_mode(void) -{ - pr_info("Using IPI %s mode\n", - no_broadcast ? "No-Shortcut" : "Shortcut"); - return 0; -} -late_initcall(print_ipi_mode); - -static int default_x86_32_early_logical_apicid(int cpu) -{ - return 1 << cpu; -} - -static void setup_apic_flat_routing(void) -{ -#ifdef CONFIG_X86_IO_APIC - printk(KERN_INFO - "Enabling APIC mode: Flat. Using %d I/O APICs\n", - nr_ioapics); -#endif -} - -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* - * Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -/* should be called last. */ -static int probe_default(void) -{ - return 1; -} - -static struct apic apic_default = { - - .name = "default", - .probe = probe_default, - .acpi_madt_oem_check = NULL, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = default_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = default_target_cpus, - .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = default_check_apicid_used, - .check_apicid_present = default_check_apicid_present, - - .vector_allocation_domain = default_vector_allocation_domain, - .init_apic_ldr = default_init_apic_ldr, - - .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_flat_routing, - .multi_timer_check = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = physid_set_mask_of_physid, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = default_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = default_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - - .send_IPI_mask = default_send_IPI_mask_logical, - .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, - .send_IPI_allbutself = default_send_IPI_allbutself, - .send_IPI_all = default_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, -}; - -apic_driver(apic_default); - -struct apic *apic = &apic_default; -EXPORT_SYMBOL_GPL(apic); - -static int cmdline_apic __initdata; -static int __init parse_apic(char *arg) -{ - struct apic **drv; - - if (!arg) - return -EINVAL; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if (!strcmp((*drv)->name, arg)) { - apic = *drv; - cmdline_apic = 1; - return 0; - } - } - - /* Parsed again by __setup for debug/verbose */ - return 0; -} -early_param("apic", parse_apic); - -void __init default_setup_apic_routing(void) -{ - int version = apic_version[boot_cpu_physical_apicid]; - - if (num_possible_cpus() > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } - -#ifdef CONFIG_X86_BIGSMP - /* - * This is used to switch to bigsmp mode when - * - There is no apic= option specified by the user - * - generic_apic_probe() has chosen apic_default as the sub_arch - * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support - */ - - if (!cmdline_apic && apic == &apic_default) - generic_bigsmp_probe(); -#endif - - if (apic->setup_apic_routing) - apic->setup_apic_routing(); -} - -void __init generic_apic_probe(void) -{ - if (!cmdline_apic) { - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if ((*drv)->probe()) { - apic = *drv; - break; - } - } - /* Not visible without early console */ - if (drv == __apicdrivers_end) - panic("Didn't find an APIC driver"); - } - printk(KERN_INFO "Using APIC driver %s\n", apic->name); -} - -/* These functions can switch the APIC even after the initial ->probe() */ - -int __init -generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if (!((*drv)->mps_oem_check)) - continue; - if (!(*drv)->mps_oem_check(mpc, oem, productid)) - continue; - - if (!cmdline_apic) { - apic = *drv; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; - } - return 0; -} - -int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if (!(*drv)->acpi_madt_oem_check) - continue; - if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) - continue; - - if (!cmdline_apic) { - apic = *drv; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; - } - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/probe_64.c b/ANDROID_3.4.5/arch/x86/kernel/apic/probe_64.c deleted file mode 100644 index 3fe98669..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/probe_64.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 - * - * Generic APIC sub-arch probe layer. - * - * Hacked for x86-64 by James Cleverdon from i386 architecture code by - * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and - * James Cleverdon. - */ -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/string.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/hardirq.h> -#include <linux/dmar.h> - -#include <asm/smp.h> -#include <asm/apic.h> -#include <asm/ipi.h> -#include <asm/setup.h> - -static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -/* - * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. - */ -void __init default_setup_apic_routing(void) -{ - struct apic **drv; - - enable_IR_x2apic(); - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if ((*drv)->probe && (*drv)->probe()) { - if (apic != *drv) { - apic = *drv; - pr_info("Switched APIC routing to %s.\n", - apic->name); - } - break; - } - } - - if (is_vsmp_box()) { - /* need to update phys_pkg_id */ - apic->phys_pkg_id = apicid_phys_pkg_id; - } -} - -/* Same for both flat and physical. */ - -void apic_send_IPI_self(int vector) -{ - __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); -} - -int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - struct apic **drv; - - for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { - if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) { - if (apic != *drv) { - apic = *drv; - pr_info("Setting APIC routing to %s.\n", - apic->name); - } - return 1; - } - } - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/summit_32.c b/ANDROID_3.4.5/arch/x86/kernel/apic/summit_32.c deleted file mode 100644 index fea000b2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/summit_32.c +++ /dev/null @@ -1,557 +0,0 @@ -/* - * IBM Summit-Specific Code - * - * Written By: Matthew Dobson, IBM Corporation - * - * Copyright (c) 2003 IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <colpatch@us.ibm.com> - * - */ - -#include <linux/mm.h> -#include <linux/init.h> -#include <asm/io.h> -#include <asm/bios_ebda.h> - -/* - * APIC driver for the IBM "Summit" chipset. - */ -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <asm/mpspec.h> -#include <asm/apic.h> -#include <asm/smp.h> -#include <asm/fixmap.h> -#include <asm/apicdef.h> -#include <asm/ipi.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/smp.h> - -static unsigned summit_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_logical(mask, vector); -} - -static void summit_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); -} - -static void summit_send_IPI_all(int vector) -{ - summit_send_IPI_mask(cpu_online_mask, vector); -} - -#include <asm/tsc.h> - -extern int use_cyclone; - -#ifdef CONFIG_X86_SUMMIT_NUMA -static void setup_summit(void); -#else -static inline void setup_summit(void) {} -#endif - -static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, - char *productid) -{ - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) - || !strncmp(productid, "EXA", 3) - || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -/* Hook from generic ACPI tables.c */ -static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && - (!strncmp(oem_table_id, "SERVIGIL", 8) - || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -struct rio_table_hdr { - unsigned char version; /* Version number of this data structure */ - /* Version 3 adds chassis_num & WP_index */ - unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ - unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ -} __attribute__((packed)); - -struct scal_detail { - unsigned char node_id; /* Scalability Node ID */ - unsigned long CBAR; /* Address of 1MB register space */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF = None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port2node; /* Node ID port connected to: 0xFF = None */ - unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - unsigned char node_id; /* RIO Node ID */ - unsigned long BBAR; /* Address of 1MB register space */ - unsigned char type; /* Type of device */ - unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ - /* For CYC: Node ID of Twister that owns this CYC */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF=None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ - /* For CYC: 0 */ - unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie:*/ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - /* For CYC: Bits0:7 Reserved */ - unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - /* For CYC: No meaning */ - unsigned char chassis_num; /* 1 based Chassis number */ - /* For LookOut WPEGs this field indicates the */ - /* Expansion Chassis #, enumerated from Boot */ - /* Node WPEG external port, then Boot Node CYC */ - /* external port, then Next Vigil chassis WPEG */ - /* external port, etc. */ - /* Shared Lookouts have only 1 chassis number (the */ - /* first one assigned) */ -} __attribute__((packed)); - - -typedef enum { - CompatTwister = 0, /* Compatibility Twister */ - AltTwister = 1, /* Alternate Twister of internal 8-way */ - CompatCyclone = 2, /* Compatibility Cyclone */ - AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ - CompatWPEG = 4, /* Compatibility WPEG */ - AltWPEG = 5, /* Second Planar WPEG */ - LookOutAWPEG = 6, /* LookOut WPEG */ - LookOutBWPEG = 7, /* LookOut WPEG */ -} node_type; - -static inline int is_WPEG(struct rio_detail *rio){ - return (rio->type == CompatWPEG || rio->type == AltWPEG || - rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); -} - -#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static const struct cpumask *summit_target_cpus(void) -{ - /* CPU_MASK_ALL (0xff) has undefined behaviour with - * dest_LowestPrio mode logical clustered apic interrupt routing - * Just start on cpu 0. IRQ balancing will spread load - */ - return cpumask_of(0); -} - -static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) -{ - return 0; -} - -/* we don't use the phys_cpu_present_map to indicate apicid presence */ -static unsigned long summit_check_apicid_present(int bit) -{ - return 1; -} - -static int summit_early_logical_apicid(int cpu) -{ - int count = 0; - u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu); - u8 my_cluster = APIC_CLUSTER(my_id); -#ifdef CONFIG_SMP - u8 lid; - int i; - - /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = nr_cpu_ids; --i >= 0; ) { - lid = early_per_cpu(x86_cpu_to_logical_apicid, i); - if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) - ++count; - } -#endif - /* We only have a 4 wide bitmap in cluster mode. If a deranged - * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ - BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); - return my_cluster | (1UL << count); -} - -static void summit_init_apic_ldr(void) -{ - int cpu = smp_processor_id(); - unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - unsigned long val; - - apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static int summit_apic_id_registered(void) -{ - return 1; -} - -static void summit_setup_apic_routing(void) -{ - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); -} - -static int summit_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0x0FL, retmap); -} - -static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) -{ - physid_set_mask_of_physid(0, retmap); -} - -static int summit_check_phys_apicid_present(int physical_apicid) -{ - return 1; -} - -static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - unsigned int round = 0; - int cpu, apicid = 0; - - /* - * The cpus in the mask must all be on the apic cluster. - */ - for_each_cpu(cpu, cpumask) { - int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - - if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk("%s: Not a valid mask!\n", __func__); - return BAD_APICID; - } - apicid |= new_apicid; - round++; - } - return apicid; -} - -static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = summit_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -/* - * cpuid returns the value latched in the HW at reset, not the APIC ID - * register's value. For any box whose BIOS changes APIC IDs, like - * clustered APIC systems, we must use hard_smp_processor_id. - * - * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. - */ -static int summit_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -static int probe_summit(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -#ifdef CONFIG_X86_SUMMIT_NUMA -static struct rio_table_hdr *rio_table_hdr; -static struct scal_detail *scal_devs[MAX_NUMNODES]; -static struct rio_detail *rio_devs[MAX_NUMNODES*4]; - -#ifndef CONFIG_X86_NUMAQ -static int mp_bus_id_to_node[MAX_MP_BUSSES]; -#endif - -static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) -{ - int twister = 0, node = 0; - int i, bus, num_buses; - - for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { - if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) { - twister = rio_devs[i]->owner_id; - break; - } - } - if (i == rio_table_hdr->num_rio_dev) { - printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); - return last_bus; - } - - for (i = 0; i < rio_table_hdr->num_scal_dev; i++) { - if (scal_devs[i]->node_id == twister) { - node = scal_devs[i]->node_id; - break; - } - } - if (i == rio_table_hdr->num_scal_dev) { - printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); - return last_bus; - } - - switch (rio_devs[wpeg_num]->type) { - case CompatWPEG: - /* - * The Compatibility Winnipeg controls the 2 legacy buses, - * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case - * a PCI-PCI bridge card is used in either slot: total 5 buses. - */ - num_buses = 5; - break; - case AltWPEG: - /* - * The Alternate Winnipeg controls the 2 133MHz buses [1 slot - * each], their 2 "extra" buses, the 100MHz bus [2 slots] and - * the "extra" buses for each of those slots: total 7 buses. - */ - num_buses = 7; - break; - case LookOutAWPEG: - case LookOutBWPEG: - /* - * A Lookout Winnipeg controls 3 100MHz buses [2 slots each] - * & the "extra" buses for each of those slots: total 9 buses. - */ - num_buses = 9; - break; - default: - printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); - return last_bus; - } - - for (bus = last_bus; bus < last_bus + num_buses; bus++) - mp_bus_id_to_node[bus] = node; - return bus; -} - -static int build_detail_arrays(void) -{ - unsigned long ptr; - int i, scal_detail_size, rio_detail_size; - - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { - printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); - return 0; - } - - switch (rio_table_hdr->version) { - default: - printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); - return 0; - case 2: - scal_detail_size = 11; - rio_detail_size = 13; - break; - case 3: - scal_detail_size = 12; - rio_detail_size = 15; - break; - } - - ptr = (unsigned long)rio_table_hdr + 3; - for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) - scal_devs[i] = (struct scal_detail *)ptr; - - for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) - rio_devs[i] = (struct rio_detail *)ptr; - - return 1; -} - -void setup_summit(void) -{ - unsigned long ptr; - unsigned short offset; - int i, next_wpeg, next_bus = 0; - - /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ - ptr = get_bios_ebda(); - ptr = (unsigned long)phys_to_virt(ptr); - - rio_table_hdr = NULL; - offset = 0x180; - while (offset) { - /* The block id is stored in the 2nd word */ - if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) { - /* set the pointer past the offset & block id */ - rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); - break; - } - /* The next offset is stored in the 1st word. 0 means no more */ - offset = *((unsigned short *)(ptr + offset)); - } - if (!rio_table_hdr) { - printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); - return; - } - - if (!build_detail_arrays()) - return; - - /* The first Winnipeg we're looking for has an index of 0 */ - next_wpeg = 0; - do { - for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { - if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) { - /* It's the Winnipeg we're looking for! */ - next_bus = setup_pci_node_map_for_wpeg(i, next_bus); - next_wpeg++; - break; - } - } - /* - * If we go through all Rio devices and don't find one with - * the next index, it means we've found all the Winnipegs, - * and thus all the PCI buses. - */ - if (i == rio_table_hdr->num_rio_dev) - next_wpeg = 0; - } while (next_wpeg != 0); -} -#endif - -static struct apic apic_summit = { - - .name = "summit", - .probe = probe_summit, - .acpi_madt_oem_check = summit_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, - .apic_id_registered = summit_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = summit_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = summit_check_apicid_used, - .check_apicid_present = summit_check_apicid_present, - - .vector_allocation_domain = summit_vector_allocation_domain, - .init_apic_ldr = summit_init_apic_ldr, - - .ioapic_phys_id_map = summit_ioapic_phys_id_map, - .setup_apic_routing = summit_setup_apic_routing, - .multi_timer_check = NULL, - .cpu_present_to_apicid = summit_cpu_present_to_apicid, - .apicid_to_cpu_present = summit_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = summit_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = summit_phys_pkg_id, - .mps_oem_check = summit_mps_oem_check, - - .get_apic_id = summit_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, - - .send_IPI_mask = summit_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = summit_send_IPI_allbutself, - .send_IPI_all = summit_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, - - .x86_32_early_logical_apicid = summit_early_logical_apicid, -}; - -apic_driver(apic_summit); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_cluster.c b/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_cluster.c deleted file mode 100644 index 48f3103b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_cluster.c +++ /dev/null @@ -1,269 +0,0 @@ -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/dmar.h> -#include <linux/cpu.h> - -#include <asm/smp.h> -#include <asm/x2apic.h> - -static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); -static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); -static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); - -static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return x2apic_enabled(); -} - -static inline u32 x2apic_cluster(int cpu) -{ - return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; -} - -static void -__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) -{ - struct cpumask *cpus_in_cluster_ptr; - struct cpumask *ipi_mask_ptr; - unsigned int cpu, this_cpu; - unsigned long flags; - u32 dest; - - x2apic_wrmsr_fence(); - - local_irq_save(flags); - - this_cpu = smp_processor_id(); - - /* - * We are to modify mask, so we need an own copy - * and be sure it's manipulated with irq off. - */ - ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); - - /* - * The idea is to send one IPI per cluster. - */ - for_each_cpu(cpu, ipi_mask_ptr) { - unsigned long i; - - cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); - dest = 0; - - /* Collect cpus in cluster. */ - for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { - if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) - dest |= per_cpu(x86_cpu_to_logical_apicid, i); - } - - if (!dest) - continue; - - __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); - /* - * Cluster sibling cpus should be discared now so - * we would not send IPI them second time. - */ - cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); - } - - local_irq_restore(flags); -} - -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) -{ - __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC); -} - -static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); -} - -static void x2apic_send_IPI_allbutself(int vector) -{ - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); -} - -static void x2apic_send_IPI_all(int vector) -{ - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); -} - -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - - return per_cpu(x86_cpu_to_logical_apicid, cpu); -} - -static void init_x2apic_ldr(void) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int cpu; - - per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); - - __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); - for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu)); - __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu)); - } -} - - /* - * At CPU state changes, update the x2apic cluster sibling info. - */ -static int __cpuinit -update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int this_cpu = (unsigned long)hcpu; - unsigned int cpu; - int err = 0; - - switch (action) { - case CPU_UP_PREPARE: - if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), - GFP_KERNEL)) { - err = -ENOMEM; - } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu), - GFP_KERNEL)) { - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - err = -ENOMEM; - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu)); - __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); - } - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - free_cpumask_var(per_cpu(ipi_mask, this_cpu)); - break; - } - - return notifier_from_errno(err); -} - -static struct notifier_block __refdata x2apic_cpu_notifier = { - .notifier_call = update_clusterinfo, -}; - -static int x2apic_init_cpu_notifier(void) -{ - int cpu = smp_processor_id(); - - zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL); - - BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); - - __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); - register_hotcpu_notifier(&x2apic_cpu_notifier); - return 1; -} - -static int x2apic_cluster_probe(void) -{ - if (x2apic_mode) - return x2apic_init_cpu_notifier(); - else - return 0; -} - -static struct apic apic_x2apic_cluster = { - - .name = "cluster x2apic", - .probe = x2apic_cluster_probe, - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .apic_id_valid = x2apic_apic_id_valid, - .apic_id_registered = x2apic_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = 1, /* logical */ - - .target_cpus = x2apic_target_cpus, - .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = NULL, - .check_apicid_present = NULL, - - .vector_allocation_domain = x2apic_vector_allocation_domain, - .init_apic_ldr = init_x2apic_ldr, - - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, - .multi_timer_check = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = x2apic_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, - - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_self = x2apic_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, - - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = native_x2apic_icr_read, - .icr_write = native_x2apic_icr_write, - .wait_icr_idle = native_x2apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, -}; - -apic_driver(apic_x2apic_cluster); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_phys.c b/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_phys.c deleted file mode 100644 index 991e315f..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_phys.c +++ /dev/null @@ -1,181 +0,0 @@ -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/ctype.h> -#include <linux/init.h> -#include <linux/dmar.h> - -#include <asm/smp.h> -#include <asm/x2apic.h> - -int x2apic_phys; - -static struct apic apic_x2apic_phys; - -static int set_x2apic_phys_mode(char *arg) -{ - x2apic_phys = 1; - return 0; -} -early_param("x2apic_phys", set_x2apic_phys_mode); - -static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (x2apic_phys) - return x2apic_enabled(); - else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && - x2apic_enabled()) { - printk(KERN_DEBUG "System requires x2apic physical mode\n"); - return 1; - } - else - return 0; -} - -static void -__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) -{ - unsigned long query_cpu; - unsigned long this_cpu; - unsigned long flags; - - x2apic_wrmsr_fence(); - - local_irq_save(flags); - - this_cpu = smp_processor_id(); - for_each_cpu(query_cpu, mask) { - if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) - continue; - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) -{ - __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC); -} - -static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); -} - -static void x2apic_send_IPI_allbutself(int vector) -{ - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); -} - -static void x2apic_send_IPI_all(int vector) -{ - __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); -} - -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - - return per_cpu(x86_cpu_to_apicid, cpu); -} - -static void init_x2apic_ldr(void) -{ -} - -static int x2apic_phys_probe(void) -{ - if (x2apic_mode && x2apic_phys) - return 1; - - return apic == &apic_x2apic_phys; -} - -static struct apic apic_x2apic_phys = { - - .name = "physical x2apic", - .probe = x2apic_phys_probe, - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .apic_id_valid = x2apic_apic_id_valid, - .apic_id_registered = x2apic_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ - - .target_cpus = x2apic_target_cpus, - .disable_esr = 0, - .dest_logical = 0, - .check_apicid_used = NULL, - .check_apicid_present = NULL, - - .vector_allocation_domain = x2apic_vector_allocation_domain, - .init_apic_ldr = init_x2apic_ldr, - - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, - .multi_timer_check = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = x2apic_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, - - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_self = x2apic_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, - - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = native_x2apic_icr_read, - .icr_write = native_x2apic_icr_write, - .wait_icr_idle = native_x2apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, -}; - -apic_driver(apic_x2apic_phys); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_uv_x.c b/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_uv_x.c deleted file mode 100644 index 87bfa69e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apic/x2apic_uv_x.c +++ /dev/null @@ -1,895 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * SGI UV APIC functions (note: not an Intel compatible APIC) - * - * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. - */ -#include <linux/cpumask.h> -#include <linux/hardirq.h> -#include <linux/proc_fs.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/slab.h> -#include <linux/cpu.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/pci.h> -#include <linux/kdebug.h> -#include <linux/delay.h> -#include <linux/crash_dump.h> - -#include <asm/uv/uv_mmrs.h> -#include <asm/uv/uv_hub.h> -#include <asm/current.h> -#include <asm/pgtable.h> -#include <asm/uv/bios.h> -#include <asm/uv/uv.h> -#include <asm/apic.h> -#include <asm/ipi.h> -#include <asm/smp.h> -#include <asm/x86_init.h> -#include <asm/emergency-restart.h> -#include <asm/nmi.h> - -/* BMC sets a bit this MMR non-zero before sending an NMI */ -#define UVH_NMI_MMR UVH_SCRATCH5 -#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) -#define UV_NMI_PENDING_MASK (1UL << 63) -DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); - -DEFINE_PER_CPU(int, x2apic_extra_bits); - -#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args) - -static enum uv_system_type uv_system_type; -static u64 gru_start_paddr, gru_end_paddr; -static union uvh_apicid uvh_apicid; -int uv_min_hub_revision_id; -EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); -unsigned int uv_apicid_hibits; -EXPORT_SYMBOL_GPL(uv_apicid_hibits); -static DEFINE_SPINLOCK(uv_nmi_lock); - -static struct apic apic_x2apic_uv_x; - -static unsigned long __init uv_early_read_mmr(unsigned long addr) -{ - unsigned long val, *mmr; - - mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr)); - val = *mmr; - early_iounmap(mmr, sizeof(*mmr)); - return val; -} - -static inline bool is_GRU_range(u64 start, u64 end) -{ - return start >= gru_start_paddr && end <= gru_end_paddr; -} - -static bool uv_is_untracked_pat_range(u64 start, u64 end) -{ - return is_ISA_range(start, end) || is_GRU_range(start, end); -} - -static int __init early_get_pnodeid(void) -{ - union uvh_node_id_u node_id; - union uvh_rh_gam_config_mmr_u m_n_config; - int pnode; - - /* Currently, all blades have same revision number */ - node_id.v = uv_early_read_mmr(UVH_NODE_ID); - m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); - uv_min_hub_revision_id = node_id.s.revision; - - if (node_id.s.part_number == UV2_HUB_PART_NUMBER) - uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; - if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X) - uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; - - uv_hub_info->hub_revision = uv_min_hub_revision_id; - pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); - return pnode; -} - -static void __init early_get_apic_pnode_shift(void) -{ - uvh_apicid.v = uv_early_read_mmr(UVH_APICID); - if (!uvh_apicid.v) - /* - * Old bios, use default value - */ - uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; -} - -/* - * Add an extra bit as dictated by bios to the destination apicid of - * interrupts potentially passing through the UV HUB. This prevents - * a deadlock between interrupts and IO port operations. - */ -static void __init uv_set_apicid_hibit(void) -{ - union uv1h_lb_target_physical_apic_id_mask_u apicid_mask; - - if (is_uv1_hub()) { - apicid_mask.v = - uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK); - uv_apicid_hibits = - apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK; - } -} - -static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - int pnodeid, is_uv1, is_uv2; - - is_uv1 = !strcmp(oem_id, "SGI"); - is_uv2 = !strcmp(oem_id, "SGI2"); - if (is_uv1 || is_uv2) { - uv_hub_info->hub_revision = - is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE; - pnodeid = early_get_pnodeid(); - early_get_apic_pnode_shift(); - x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; - x86_platform.nmi_init = uv_nmi_init; - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) { - __this_cpu_write(x2apic_extra_bits, - pnodeid << uvh_apicid.s.pnode_shift); - uv_system_type = UV_NON_UNIQUE_APIC; - uv_set_apicid_hibit(); - return 1; - } - } - return 0; -} - -enum uv_system_type get_uv_system_type(void) -{ - return uv_system_type; -} - -int is_uv_system(void) -{ - return uv_system_type != UV_NONE; -} -EXPORT_SYMBOL_GPL(is_uv_system); - -DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); - -struct uv_blade_info *uv_blade_info; -EXPORT_SYMBOL_GPL(uv_blade_info); - -short *uv_node_to_blade; -EXPORT_SYMBOL_GPL(uv_node_to_blade); - -short *uv_cpu_to_blade; -EXPORT_SYMBOL_GPL(uv_cpu_to_blade); - -short uv_possible_blades; -EXPORT_SYMBOL_GPL(uv_possible_blades); - -unsigned long sn_rtc_cycles_per_second; -EXPORT_SYMBOL(sn_rtc_cycles_per_second); - -static const struct cpumask *uv_target_cpus(void) -{ - return cpu_online_mask; -} - -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) -{ -#ifdef CONFIG_SMP - unsigned long val; - int pnode; - - pnode = uv_apicid_to_pnode(phys_apicid); - phys_apicid |= uv_apicid_hibits; - val = (1UL << UVH_IPI_INT_SEND_SHFT) | - (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | - ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | - APIC_DM_INIT; - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); - - val = (1UL << UVH_IPI_INT_SEND_SHFT) | - (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | - ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | - APIC_DM_STARTUP; - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); - - atomic_set(&init_deasserted, 1); -#endif - return 0; -} - -static void uv_send_IPI_one(int cpu, int vector) -{ - unsigned long apicid; - int pnode; - - apicid = per_cpu(x86_cpu_to_apicid, cpu); - pnode = uv_apicid_to_pnode(apicid); - uv_hub_send_ipi(pnode, apicid, vector); -} - -static void uv_send_IPI_mask(const struct cpumask *mask, int vector) -{ - unsigned int cpu; - - for_each_cpu(cpu, mask) - uv_send_IPI_one(cpu, vector); -} - -static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int cpu; - - for_each_cpu(cpu, mask) { - if (cpu != this_cpu) - uv_send_IPI_one(cpu, vector); - } -} - -static void uv_send_IPI_allbutself(int vector) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int cpu; - - for_each_online_cpu(cpu) { - if (cpu != this_cpu) - uv_send_IPI_one(cpu, vector); - } -} - -static void uv_send_IPI_all(int vector) -{ - uv_send_IPI_mask(cpu_online_mask, vector); -} - -static int uv_apic_id_valid(int apicid) -{ - return 1; -} - -static int uv_apic_id_registered(void) -{ - return 1; -} - -static void uv_init_apic_ldr(void) -{ -} - -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; - else - return BAD_APICID; -} - -static unsigned int -uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; -} - -static unsigned int x2apic_get_apic_id(unsigned long x) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = x | __this_cpu_read(x2apic_extra_bits); - - return id; -} - -static unsigned long set_apic_id(unsigned int id) -{ - unsigned long x; - - /* maskout x2apic_extra_bits ? */ - x = id; - return x; -} - -static unsigned int uv_read_apic_id(void) -{ - - return x2apic_get_apic_id(apic_read(APIC_ID)); -} - -static int uv_phys_pkg_id(int initial_apicid, int index_msb) -{ - return uv_read_apic_id() >> index_msb; -} - -static void uv_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - -static int uv_probe(void) -{ - return apic == &apic_x2apic_uv_x; -} - -static struct apic __refdata apic_x2apic_uv_x = { - - .name = "UV large system", - .probe = uv_probe, - .acpi_madt_oem_check = uv_acpi_madt_oem_check, - .apic_id_valid = uv_apic_id_valid, - .apic_id_registered = uv_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ - - .target_cpus = uv_target_cpus, - .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = NULL, - .check_apicid_present = NULL, - - .vector_allocation_domain = uv_vector_allocation_domain, - .init_apic_ldr = uv_init_apic_ldr, - - .ioapic_phys_id_map = NULL, - .setup_apic_routing = NULL, - .multi_timer_check = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = NULL, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = uv_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = x2apic_get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, - - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, - - .send_IPI_mask = uv_send_IPI_mask, - .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, - .send_IPI_allbutself = uv_send_IPI_allbutself, - .send_IPI_all = uv_send_IPI_all, - .send_IPI_self = uv_send_IPI_self, - - .wakeup_secondary_cpu = uv_wakeup_secondary, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, - - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = native_x2apic_icr_read, - .icr_write = native_x2apic_icr_write, - .wait_icr_idle = native_x2apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, -}; - -static __cpuinit void set_x2apic_extra_bits(int pnode) -{ - __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); -} - -/* - * Called on boot cpu. - */ -static __init int boot_pnode_to_blade(int pnode) -{ - int blade; - - for (blade = 0; blade < uv_num_possible_blades(); blade++) - if (pnode == uv_blade_info[blade].pnode) - return blade; - BUG(); -} - -struct redir_addr { - unsigned long redirect; - unsigned long alias; -}; - -#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT - -static __initdata struct redir_addr redir_addrs[] = { - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR}, -}; - -static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) -{ - union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias; - union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; - int i; - - for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { - alias.v = uv_read_local_mmr(redir_addrs[i].alias); - if (alias.s.enable && alias.s.base == 0) { - *size = (1UL << alias.s.m_alias); - redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); - *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; - return; - } - } - *base = *size = 0; -} - -enum map_type {map_wb, map_uc}; - -static __init void map_high(char *id, unsigned long base, int pshift, - int bshift, int max_pnode, enum map_type map_type) -{ - unsigned long bytes, paddr; - - paddr = base << pshift; - bytes = (1UL << bshift) * (max_pnode + 1); - printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, - paddr + bytes); - if (map_type == map_uc) - init_extra_mapping_uc(paddr, bytes); - else - init_extra_mapping_wb(paddr, bytes); - -} -static __init void map_gru_high(int max_pnode) -{ - union uvh_rh_gam_gru_overlay_config_mmr_u gru; - int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; - - gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); - if (gru.s.enable) { - map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); - gru_start_paddr = ((u64)gru.s.base << shift); - gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); - - } -} - -static __init void map_mmr_high(int max_pnode) -{ - union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; - int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; - - mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); - if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); -} - -static __init void map_mmioh_high(int max_pnode) -{ - union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; - int shift; - - mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - if (is_uv1_hub() && mmioh.s1.enable) { - shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; - map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io, - max_pnode, map_uc); - } - if (is_uv2_hub() && mmioh.s2.enable) { - shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; - map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io, - max_pnode, map_uc); - } -} - -static __init void map_low_mmrs(void) -{ - init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); - init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); -} - -static __init void uv_rtc_init(void) -{ - long status; - u64 ticks_per_sec; - - status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, - &ticks_per_sec); - if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) { - printk(KERN_WARNING - "unable to determine platform RTC clock frequency, " - "guessing.\n"); - /* BIOS gives wrong value for clock freq. so guess */ - sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; - } else - sn_rtc_cycles_per_second = ticks_per_sec; -} - -/* - * percpu heartbeat timer - */ -static void uv_heartbeat(unsigned long ignored) -{ - struct timer_list *timer = &uv_hub_info->scir.timer; - unsigned char bits = uv_hub_info->scir.state; - - /* flip heartbeat bit */ - bits ^= SCIR_CPU_HEARTBEAT; - - /* is this cpu idle? */ - if (idle_cpu(raw_smp_processor_id())) - bits &= ~SCIR_CPU_ACTIVITY; - else - bits |= SCIR_CPU_ACTIVITY; - - /* update system controller interface reg */ - uv_set_scir_bits(bits); - - /* enable next timer period */ - mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); -} - -static void __cpuinit uv_heartbeat_enable(int cpu) -{ - while (!uv_cpu_hub_info(cpu)->scir.enabled) { - struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; - - uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); - setup_timer(timer, uv_heartbeat, cpu); - timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; - add_timer_on(timer, cpu); - uv_cpu_hub_info(cpu)->scir.enabled = 1; - - /* also ensure that boot cpu is enabled */ - cpu = 0; - } -} - -#ifdef CONFIG_HOTPLUG_CPU -static void __cpuinit uv_heartbeat_disable(int cpu) -{ - if (uv_cpu_hub_info(cpu)->scir.enabled) { - uv_cpu_hub_info(cpu)->scir.enabled = 0; - del_timer(&uv_cpu_hub_info(cpu)->scir.timer); - } - uv_set_cpu_scir_bits(cpu, 0xff); -} - -/* - * cpu hotplug notifier - */ -static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - - switch (action) { - case CPU_ONLINE: - uv_heartbeat_enable(cpu); - break; - case CPU_DOWN_PREPARE: - uv_heartbeat_disable(cpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -static __init void uv_scir_register_cpu_notifier(void) -{ - hotcpu_notifier(uv_scir_cpu_notify, 0); -} - -#else /* !CONFIG_HOTPLUG_CPU */ - -static __init void uv_scir_register_cpu_notifier(void) -{ -} - -static __init int uv_init_heartbeat(void) -{ - int cpu; - - if (is_uv_system()) - for_each_online_cpu(cpu) - uv_heartbeat_enable(cpu); - return 0; -} - -late_initcall(uv_init_heartbeat); - -#endif /* !CONFIG_HOTPLUG_CPU */ - -/* Direct Legacy VGA I/O traffic to designated IOH */ -int uv_set_vga_state(struct pci_dev *pdev, bool decode, - unsigned int command_bits, u32 flags) -{ - int domain, bus, rc; - - PR_DEVEL("devfn %x decode %d cmd %x flags %d\n", - pdev->devfn, decode, command_bits, flags); - - if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE)) - return 0; - - if ((command_bits & PCI_COMMAND_IO) == 0) - return 0; - - domain = pci_domain_nr(pdev->bus); - bus = pdev->bus->number; - - rc = uv_bios_set_legacy_vga_target(decode, domain, bus); - PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc); - - return rc; -} - -/* - * Called on each cpu to initialize the per_cpu UV data area. - * FIXME: hotplug not supported yet - */ -void __cpuinit uv_cpu_init(void) -{ - /* CPU 0 initilization will be done via uv_system_init. */ - if (!uv_blade_info) - return; - - uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; - - if (get_uv_system_type() == UV_NON_UNIQUE_APIC) - set_x2apic_extra_bits(uv_hub_info->pnode); -} - -/* - * When NMI is received, print a stack trace. - */ -int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) -{ - unsigned long real_uv_nmi; - int bid; - - /* - * Each blade has an MMR that indicates when an NMI has been sent - * to cpus on the blade. If an NMI is detected, atomically - * clear the MMR and update a per-blade NMI count used to - * cause each cpu on the blade to notice a new NMI. - */ - bid = uv_numa_blade_id(); - real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); - - if (unlikely(real_uv_nmi)) { - spin_lock(&uv_blade_info[bid].nmi_lock); - real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); - if (real_uv_nmi) { - uv_blade_info[bid].nmi_count++; - uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); - } - spin_unlock(&uv_blade_info[bid].nmi_lock); - } - - if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) - return NMI_DONE; - - __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; - - /* - * Use a lock so only one cpu prints at a time. - * This prevents intermixed output. - */ - spin_lock(&uv_nmi_lock); - pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); - dump_stack(); - spin_unlock(&uv_nmi_lock); - - return NMI_HANDLED; -} - -void uv_register_nmi_notifier(void) -{ - if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) - printk(KERN_WARNING "UV NMI handler failed to register\n"); -} - -void uv_nmi_init(void) -{ - unsigned int value; - - /* - * Unmask NMI on all cpus - */ - value = apic_read(APIC_LVT1) | APIC_DM_NMI; - value &= ~APIC_LVT_MASKED; - apic_write(APIC_LVT1, value); -} - -void __init uv_system_init(void) -{ - union uvh_rh_gam_config_mmr_u m_n_config; - union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; - union uvh_node_id_u node_id; - unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; - int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; - int gnode_extra, max_pnode = 0; - unsigned long mmr_base, present, paddr; - unsigned short pnode_mask, pnode_io_mask; - - printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2"); - map_low_mmrs(); - - m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); - m_val = m_n_config.s.m_skt; - n_val = m_n_config.s.n_skt; - mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io; - mmr_base = - uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & - ~UV_MMR_ENABLE; - pnode_mask = (1 << n_val) - 1; - pnode_io_mask = (1 << n_io) - 1; - - node_id.v = uv_read_local_mmr(UVH_NODE_ID); - gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; - gnode_upper = ((unsigned long)gnode_extra << m_val); - printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", - n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); - - printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); - - for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) - uv_possible_blades += - hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); - - /* uv_num_possible_blades() is really the hub count */ - printk(KERN_INFO "UV: Found %d blades, %d hubs\n", - is_uv1_hub() ? uv_num_possible_blades() : - (uv_num_possible_blades() + 1) / 2, - uv_num_possible_blades()); - - bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!uv_blade_info); - - for (blade = 0; blade < uv_num_possible_blades(); blade++) - uv_blade_info[blade].memory_nid = -1; - - get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); - - bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); - uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!uv_node_to_blade); - memset(uv_node_to_blade, 255, bytes); - - bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); - uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!uv_cpu_to_blade); - memset(uv_cpu_to_blade, 255, bytes); - - blade = 0; - for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { - present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); - for (j = 0; j < 64; j++) { - if (!test_bit(j, &present)) - continue; - pnode = (i * 64 + j) & pnode_mask; - uv_blade_info[blade].pnode = pnode; - uv_blade_info[blade].nr_possible_cpus = 0; - uv_blade_info[blade].nr_online_cpus = 0; - spin_lock_init(&uv_blade_info[blade].nmi_lock); - max_pnode = max(pnode, max_pnode); - blade++; - } - } - - uv_bios_init(); - uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, - &sn_region_size, &system_serial_number); - uv_rtc_init(); - - for_each_present_cpu(cpu) { - int apicid = per_cpu(x86_cpu_to_apicid, cpu); - - nid = cpu_to_node(cpu); - /* - * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); - */ - uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; - uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; - uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; - - uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; - uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ? - (m_val == 40 ? 40 : 39) : m_val; - - pnode = uv_apicid_to_pnode(apicid); - blade = boot_pnode_to_blade(pnode); - lcpu = uv_blade_info[blade].nr_possible_cpus; - uv_blade_info[blade].nr_possible_cpus++; - - /* Any node on the blade, else will contain -1. */ - uv_blade_info[blade].memory_nid = nid; - - uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; - uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; - uv_cpu_hub_info(cpu)->m_val = m_val; - uv_cpu_hub_info(cpu)->n_val = n_val; - uv_cpu_hub_info(cpu)->numa_blade_id = blade; - uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; - uv_cpu_hub_info(cpu)->pnode = pnode; - uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; - uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; - uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; - uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; - uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); - uv_node_to_blade[nid] = blade; - uv_cpu_to_blade[cpu] = blade; - } - - /* Add blade/pnode info for nodes without cpus */ - for_each_online_node(nid) { - if (uv_node_to_blade[nid] >= 0) - continue; - paddr = node_start_pfn(nid) << PAGE_SHIFT; - pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); - blade = boot_pnode_to_blade(pnode); - uv_node_to_blade[nid] = blade; - } - - map_gru_high(max_pnode); - map_mmr_high(max_pnode); - map_mmioh_high(max_pnode & pnode_io_mask); - - uv_cpu_init(); - uv_scir_register_cpu_notifier(); - uv_register_nmi_notifier(); - proc_mkdir("sgi_uv", NULL); - - /* register Legacy VGA I/O redirection handler */ - pci_register_set_vga_state(uv_set_vga_state); - - /* - * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as - * EFI is not enabled in the kdump kernel. - */ - if (is_kdump_kernel()) - reboot_type = BOOT_ACPI; -} - -apic_driver(apic_x2apic_uv_x); diff --git a/ANDROID_3.4.5/arch/x86/kernel/apm_32.c b/ANDROID_3.4.5/arch/x86/kernel/apm_32.c deleted file mode 100644 index 459e78cb..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/apm_32.c +++ /dev/null @@ -1,2451 +0,0 @@ -/* -*- linux-c -*- - * APM BIOS driver for Linux - * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au) - * - * Initial development of this driver was funded by NEC Australia P/L - * and NEC Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * October 1995, Rik Faith (faith@cs.unc.edu): - * Minor enhancements and updates (to the patch set) for 1.3.x - * Documentation - * January 1996, Rik Faith (faith@cs.unc.edu): - * Make /proc/apm easy to format (bump driver version) - * March 1996, Rik Faith (faith@cs.unc.edu): - * Prohibit APM BIOS calls unless apm_enabled. - * (Thanks to Ulrich Windl <Ulrich.Windl@rz.uni-regensburg.de>) - * April 1996, Stephen Rothwell (sfr@canb.auug.org.au) - * Version 1.0 and 1.1 - * May 1996, Version 1.2 - * Feb 1998, Version 1.3 - * Feb 1998, Version 1.4 - * Aug 1998, Version 1.5 - * Sep 1998, Version 1.6 - * Nov 1998, Version 1.7 - * Jan 1999, Version 1.8 - * Jan 1999, Version 1.9 - * Oct 1999, Version 1.10 - * Nov 1999, Version 1.11 - * Jan 2000, Version 1.12 - * Feb 2000, Version 1.13 - * Nov 2000, Version 1.14 - * Oct 2001, Version 1.15 - * Jan 2002, Version 1.16 - * Oct 2002, Version 1.16ac - * - * History: - * 0.6b: first version in official kernel, Linux 1.3.46 - * 0.7: changed /proc/apm format, Linux 1.3.58 - * 0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59 - * 0.9: only call bios if bios is present, Linux 1.3.72 - * 1.0: use fixed device number, consolidate /proc/apm into this file, - * Linux 1.3.85 - * 1.1: support user-space standby and suspend, power off after system - * halted, Linux 1.3.98 - * 1.2: When resetting RTC after resume, take care so that the time - * is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth - * <jtoth@princeton.edu>); improve interaction between - * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4 - * 1.2a:Simple change to stop mysterious bug reports with SMP also added - * levels to the printk calls. APM is not defined for SMP machines. - * The new replacement for it is, but Linux doesn't yet support this. - * Alan Cox Linux 2.1.55 - * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's - * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by - * Dean Gaudet <dgaudet@arctic.org>. - * C. Scott Ananian <cananian@alumni.princeton.edu> Linux 2.1.87 - * 1.5: Fix segment register reloading (in case of bad segments saved - * across BIOS call). - * Stephen Rothwell - * 1.6: Cope with compiler/assembler differences. - * Only try to turn off the first display device. - * Fix OOPS at power off with no APM BIOS by Jan Echternach - * <echter@informatik.uni-rostock.de> - * Stephen Rothwell - * 1.7: Modify driver's cached copy of the disabled/disengaged flags - * to reflect current state of APM BIOS. - * Chris Rankin <rankinc@bellsouth.net> - * Reset interrupt 0 timer to 100Hz after suspend - * Chad Miller <cmiller@surfsouth.com> - * Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE - * Richard Gooch <rgooch@atnf.csiro.au> - * Allow boot time disabling of APM - * Make boot messages far less verbose by default - * Make asm safer - * Stephen Rothwell - * 1.8: Add CONFIG_APM_RTC_IS_GMT - * Richard Gooch <rgooch@atnf.csiro.au> - * change APM_NOINTS to CONFIG_APM_ALLOW_INTS - * remove dependency on CONFIG_PROC_FS - * Stephen Rothwell - * 1.9: Fix small typo. <laslo@wodip.opole.pl> - * Try to cope with BIOS's that need to have all display - * devices blanked and not just the first one. - * Ross Paterson <ross@soi.city.ac.uk> - * Fix segment limit setting it has always been wrong as - * the segments needed to have byte granularity. - * Mark a few things __init. - * Add hack to allow power off of SMP systems by popular request. - * Use CONFIG_SMP instead of __SMP__ - * Ignore BOUNCES for three seconds. - * Stephen Rothwell - * 1.10: Fix for Thinkpad return code. - * Merge 2.2 and 2.3 drivers. - * Remove APM dependencies in arch/i386/kernel/process.c - * Remove APM dependencies in drivers/char/sysrq.c - * Reset time across standby. - * Allow more inititialisation on SMP. - * Remove CONFIG_APM_POWER_OFF and make it boot time - * configurable (default on). - * Make debug only a boot time parameter (remove APM_DEBUG). - * Try to blank all devices on any error. - * 1.11: Remove APM dependencies in drivers/char/console.c - * Check nr_running to detect if we are idle (from - * Borislav Deianov <borislav@lix.polytechnique.fr>) - * Fix for bioses that don't zero the top part of the - * entrypoint offset (Mario Sitta <sitta@al.unipmn.it>) - * (reported by Panos Katsaloulis <teras@writeme.com>). - * Real mode power off patch (Walter Hofmann - * <Walter.Hofmann@physik.stud.uni-erlangen.de>). - * 1.12: Remove CONFIG_SMP as the compiler will optimize - * the code away anyway (smp_num_cpus == 1 in UP) - * noted by Artur Skawina <skawina@geocities.com>. - * Make power off under SMP work again. - * Fix thinko with initial engaging of BIOS. - * Make sure power off only happens on CPU 0 - * (Paul "Rusty" Russell <rusty@rustcorp.com.au>). - * Do error notification to user mode if BIOS calls fail. - * Move entrypoint offset fix to ...boot/setup.S - * where it belongs (Cosmos <gis88564@cis.nctu.edu.tw>). - * Remove smp-power-off. SMP users must now specify - * "apm=power-off" on the kernel command line. Suggested - * by Jim Avera <jima@hal.com>, modified by Alan Cox - * <alan@lxorguk.ukuu.org.uk>. - * Register the /proc/apm entry even on SMP so that - * scripts that check for it before doing power off - * work (Jim Avera <jima@hal.com>). - * 1.13: Changes for new pm_ interfaces (Andy Henroid - * <andy_henroid@yahoo.com>). - * Modularize the code. - * Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS - * is now the way life works). - * Fix thinko in suspend() (wrong return). - * Notify drivers on critical suspend. - * Make kapmd absorb more idle time (Pavel Machek <pavel@ucw.cz> - * modified by sfr). - * Disable interrupts while we are suspended (Andy Henroid - * <andy_henroid@yahoo.com> fixed by sfr). - * Make power off work on SMP again (Tony Hoyle - * <tmh@magenta-logic.com> and <zlatko@iskon.hr>) modified by sfr. - * Remove CONFIG_APM_SUSPEND_BOUNCE. The bounce ignore - * interval is now configurable. - * 1.14: Make connection version persist across module unload/load. - * Enable and engage power management earlier. - * Disengage power management on module unload. - * Changed to use the sysrq-register hack for registering the - * power off function called by magic sysrq based upon discussions - * in irc://irc.openprojects.net/#kernelnewbies - * (Crutcher Dunnavant <crutcher+kernel@datastacks.com>). - * Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable. - * (Arjan van de Ven <arjanv@redhat.com>) modified by sfr. - * Work around byte swap bug in one of the Vaio's BIOS's - * (Marc Boucher <marc@mbsi.ca>). - * Exposed the disable flag to dmi so that we can handle known - * broken APM (Alan Cox <alan@lxorguk.ukuu.org.uk>). - * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin - * calling it - instead idle. (Alan Cox <alan@lxorguk.ukuu.org.uk>) - * If an APM idle fails log it and idle sensibly - * 1.15: Don't queue events to clients who open the device O_WRONLY. - * Don't expect replies from clients who open the device O_RDONLY. - * (Idea from Thomas Hood) - * Minor waitqueue cleanups. (John Fremlin <chief@bandits.org>) - * 1.16: Fix idle calling. (Andreas Steinmetz <ast@domdv.de> et al.) - * Notify listeners of standby or suspend events before notifying - * drivers. Return EBUSY to ioctl() if suspend is rejected. - * (Russell King <rmk@arm.linux.org.uk> and Thomas Hood) - * Ignore first resume after we generate our own resume event - * after a suspend (Thomas Hood) - * Daemonize now gets rid of our controlling terminal (sfr). - * CONFIG_APM_CPU_IDLE now just affects the default value of - * idle_threshold (sfr). - * Change name of kernel apm daemon (as it no longer idles) (sfr). - * 1.16ac: Fix up SMP support somewhat. You can now force SMP on and we - * make _all_ APM calls on the CPU#0. Fix unsafe sign bug. - * TODO: determine if its "boot CPU" or "CPU0" we want to lock to. - * - * APM 1.1 Reference: - * - * Intel Corporation, Microsoft Corporation. Advanced Power Management - * (APM) BIOS Interface Specification, Revision 1.1, September 1993. - * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01. - * - * [This document is available free from Intel by calling 800.628.8686 (fax - * 916.356.6100) or 800.548.4725; or from - * http://www.microsoft.com/whdc/archive/amp_12.mspx It is also - * available from Microsoft by calling 206.882.8080.] - * - * APM 1.2 Reference: - * Intel Corporation, Microsoft Corporation. Advanced Power Management - * (APM) BIOS Interface Specification, Revision 1.2, February 1996. - * - * [This document is available from Microsoft at: - * http://www.microsoft.com/whdc/archive/amp_12.mspx] - */ - -#include <linux/module.h> - -#include <linux/poll.h> -#include <linux/types.h> -#include <linux/stddef.h> -#include <linux/timer.h> -#include <linux/fcntl.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/miscdevice.h> -#include <linux/apm_bios.h> -#include <linux/init.h> -#include <linux/time.h> -#include <linux/sched.h> -#include <linux/pm.h> -#include <linux/capability.h> -#include <linux/device.h> -#include <linux/kernel.h> -#include <linux/freezer.h> -#include <linux/smp.h> -#include <linux/dmi.h> -#include <linux/suspend.h> -#include <linux/kthread.h> -#include <linux/jiffies.h> -#include <linux/acpi.h> -#include <linux/syscore_ops.h> -#include <linux/i8253.h> - -#include <asm/uaccess.h> -#include <asm/desc.h> -#include <asm/olpc.h> -#include <asm/paravirt.h> -#include <asm/reboot.h> - -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) -extern int (*console_blank_hook)(int); -#endif - -/* - * The apm_bios device is one of the misc char devices. - * This is its minor number. - */ -#define APM_MINOR_DEV 134 - -/* - * Various options can be changed at boot time as follows: - * (We allow underscores for compatibility with the modules code) - * apm=on/off enable/disable APM - * [no-]allow[-_]ints allow interrupts during BIOS calls - * [no-]broken[-_]psr BIOS has a broken GetPowerStatus call - * [no-]realmode[-_]power[-_]off switch to real mode before - * powering off - * [no-]debug log some debugging messages - * [no-]power[-_]off power off on shutdown - * [no-]smp Use apm even on an SMP box - * bounce[-_]interval=<n> number of ticks to ignore suspend - * bounces - * idle[-_]threshold=<n> System idle percentage above which to - * make APM BIOS idle calls. Set it to - * 100 to disable. - * idle[-_]period=<n> Period (in 1/100s of a second) over - * which the idle percentage is - * calculated. - */ - -/* KNOWN PROBLEM MACHINES: - * - * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant - * [Confirmed by TI representative] - * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification - * [Confirmed by BIOS disassembly] - * [This may work now ...] - * P: Toshiba 1950S: battery life information only gets updated after resume - * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking - * broken in BIOS [Reported by Garst R. Reese <reese@isn.net>] - * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP - * Neale Banks <neale@lowendale.com.au> December 2000 - * - * Legend: U = unusable with APM patches - * P = partially usable with APM patches - */ - -/* - * Define as 1 to make the driver always call the APM BIOS busy - * routine even if the clock was not reported as slowed by the - * idle routine. Otherwise, define as 0. - */ -#define ALWAYS_CALL_BUSY 1 - -/* - * Define to make the APM BIOS calls zero all data segment registers (so - * that an incorrect BIOS implementation will cause a kernel panic if it - * tries to write to arbitrary memory). - */ -#define APM_ZERO_SEGS - -#include <asm/apm.h> - -/* - * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. - * This patched by Chad Miller <cmiller@surfsouth.com>, original code by - * David Chen <chen@ctpa04.mit.edu> - */ -#undef INIT_TIMER_AFTER_SUSPEND - -#ifdef INIT_TIMER_AFTER_SUSPEND -#include <linux/timex.h> -#include <asm/io.h> -#include <linux/delay.h> -#endif - -/* - * Need to poll the APM BIOS every second - */ -#define APM_CHECK_TIMEOUT (HZ) - -/* - * Ignore suspend events for this amount of time after a resume - */ -#define DEFAULT_BOUNCE_INTERVAL (3 * HZ) - -/* - * Maximum number of events stored - */ -#define APM_MAX_EVENTS 20 - -/* - * The per-file APM data - */ -struct apm_user { - int magic; - struct apm_user *next; - unsigned int suser: 1; - unsigned int writer: 1; - unsigned int reader: 1; - unsigned int suspend_wait: 1; - int suspend_result; - int suspends_pending; - int standbys_pending; - int suspends_read; - int standbys_read; - int event_head; - int event_tail; - apm_event_t events[APM_MAX_EVENTS]; -}; - -/* - * The magic number in apm_user - */ -#define APM_BIOS_MAGIC 0x4101 - -/* - * idle percentage above which bios idle calls are done - */ -#ifdef CONFIG_APM_CPU_IDLE -#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012 -#define DEFAULT_IDLE_THRESHOLD 95 -#else -#define DEFAULT_IDLE_THRESHOLD 100 -#endif -#define DEFAULT_IDLE_PERIOD (100 / 3) - -/* - * Local variables - */ -static struct { - unsigned long offset; - unsigned short segment; -} apm_bios_entry; -static int clock_slowed; -static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; -static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; -static int set_pm_idle; -static int suspends_pending; -static int standbys_pending; -static int ignore_sys_suspend; -static int ignore_normal_resume; -static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; - -static bool debug __read_mostly; -static bool smp __read_mostly; -static int apm_disabled = -1; -#ifdef CONFIG_SMP -static bool power_off; -#else -static bool power_off = 1; -#endif -static bool realmode_power_off; -#ifdef CONFIG_APM_ALLOW_INTS -static bool allow_ints = 1; -#else -static bool allow_ints; -#endif -static bool broken_psr; - -static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); -static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); -static struct apm_user *user_list; -static DEFINE_SPINLOCK(user_list_lock); -static DEFINE_MUTEX(apm_mutex); - -/* - * Set up a segment that references the real mode segment 0x40 - * that extends up to the end of page zero (that we have reserved). - * This is for buggy BIOS's that refer to (real mode) segment 0x40 - * even though they are called in protected mode. - */ -static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, - (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1); - -static const char driver_version[] = "1.16ac"; /* no spaces */ - -static struct task_struct *kapmd_task; - -/* - * APM event names taken from the APM 1.2 specification. These are - * the message codes that the BIOS uses to tell us about events - */ -static const char * const apm_event_name[] = { - "system standby", - "system suspend", - "normal resume", - "critical resume", - "low battery", - "power status change", - "update time", - "critical suspend", - "user standby", - "user suspend", - "system standby resume", - "capabilities change" -}; -#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name) - -typedef struct lookup_t { - int key; - char *msg; -} lookup_t; - -/* - * The BIOS returns a set of standard error codes in AX when the - * carry flag is set. - */ - -static const lookup_t error_table[] = { -/* N/A { APM_SUCCESS, "Operation succeeded" }, */ - { APM_DISABLED, "Power management disabled" }, - { APM_CONNECTED, "Real mode interface already connected" }, - { APM_NOT_CONNECTED, "Interface not connected" }, - { APM_16_CONNECTED, "16 bit interface already connected" }, -/* N/A { APM_16_UNSUPPORTED, "16 bit interface not supported" }, */ - { APM_32_CONNECTED, "32 bit interface already connected" }, - { APM_32_UNSUPPORTED, "32 bit interface not supported" }, - { APM_BAD_DEVICE, "Unrecognized device ID" }, - { APM_BAD_PARAM, "Parameter out of range" }, - { APM_NOT_ENGAGED, "Interface not engaged" }, - { APM_BAD_FUNCTION, "Function not supported" }, - { APM_RESUME_DISABLED, "Resume timer disabled" }, - { APM_BAD_STATE, "Unable to enter requested state" }, -/* N/A { APM_NO_EVENTS, "No events pending" }, */ - { APM_NO_ERROR, "BIOS did not set a return code" }, - { APM_NOT_PRESENT, "No APM present" } -}; -#define ERROR_COUNT ARRAY_SIZE(error_table) - -/** - * apm_error - display an APM error - * @str: information string - * @err: APM BIOS return code - * - * Write a meaningful log entry to the kernel log in the event of - * an APM error. Note that this also handles (negative) kernel errors. - */ - -static void apm_error(char *str, int err) -{ - int i; - - for (i = 0; i < ERROR_COUNT; i++) - if (error_table[i].key == err) - break; - if (i < ERROR_COUNT) - printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); - else if (err < 0) - printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); - else - printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", - str, err); -} - -/* - * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and - * apm_info.allow_ints, we are being really paranoid here! Not only - * are interrupts disabled, but all the segment registers (except SS) - * are saved and zeroed this means that if the BIOS tries to reference - * any data without explicitly loading the segment registers, the kernel - * will fault immediately rather than have some unforeseen circumstances - * for the rest of the kernel. And it will be very obvious! :-) Doing - * this depends on CS referring to the same physical memory as DS so that - * DS can be zeroed before the call. Unfortunately, we can't do anything - * about the stack segment/pointer. Also, we tell the compiler that - * everything could change. - * - * Also, we KNOW that for the non error case of apm_bios_call, there - * is no useful data returned in the low order 8 bits of eax. - */ - -static inline unsigned long __apm_irq_save(void) -{ - unsigned long flags; - local_save_flags(flags); - if (apm_info.allow_ints) { - if (irqs_disabled_flags(flags)) - local_irq_enable(); - } else - local_irq_disable(); - - return flags; -} - -#define apm_irq_save(flags) \ - do { flags = __apm_irq_save(); } while (0) - -static inline void apm_irq_restore(unsigned long flags) -{ - if (irqs_disabled_flags(flags)) - local_irq_disable(); - else if (irqs_disabled()) - local_irq_enable(); -} - -#ifdef APM_ZERO_SEGS -# define APM_DECL_SEGS \ - unsigned int saved_fs; unsigned int saved_gs; -# define APM_DO_SAVE_SEGS \ - savesegment(fs, saved_fs); savesegment(gs, saved_gs) -# define APM_DO_RESTORE_SEGS \ - loadsegment(fs, saved_fs); loadsegment(gs, saved_gs) -#else -# define APM_DECL_SEGS -# define APM_DO_SAVE_SEGS -# define APM_DO_RESTORE_SEGS -#endif - -struct apm_bios_call { - u32 func; - /* In and out */ - u32 ebx; - u32 ecx; - /* Out only */ - u32 eax; - u32 edx; - u32 esi; - - /* Error: -ENOMEM, or bits 8-15 of eax */ - int err; -}; - -/** - * __apm_bios_call - Make an APM BIOS 32bit call - * @_call: pointer to struct apm_bios_call. - * - * Make an APM call using the 32bit protected mode interface. The - * caller is responsible for knowing if APM BIOS is configured and - * enabled. This call can disable interrupts for a long period of - * time on some laptops. The return value is in AH and the carry - * flag is loaded into AL. If there is an error, then the error - * code is returned in AH (bits 8-15 of eax) and this function - * returns non-zero. - * - * Note: this makes the call on the current CPU. - */ -static long __apm_bios_call(void *_call) -{ - APM_DECL_SEGS - unsigned long flags; - int cpu; - struct desc_struct save_desc_40; - struct desc_struct *gdt; - struct apm_bios_call *call = _call; - - cpu = get_cpu(); - BUG_ON(cpu != 0); - gdt = get_cpu_gdt_table(cpu); - save_desc_40 = gdt[0x40 / 8]; - gdt[0x40 / 8] = bad_bios_desc; - - apm_irq_save(flags); - APM_DO_SAVE_SEGS; - apm_bios_call_asm(call->func, call->ebx, call->ecx, - &call->eax, &call->ebx, &call->ecx, &call->edx, - &call->esi); - APM_DO_RESTORE_SEGS; - apm_irq_restore(flags); - gdt[0x40 / 8] = save_desc_40; - put_cpu(); - - return call->eax & 0xff; -} - -/* Run __apm_bios_call or __apm_bios_call_simple on CPU 0 */ -static int on_cpu0(long (*fn)(void *), struct apm_bios_call *call) -{ - int ret; - - /* Don't bother with work_on_cpu in the common case, so we don't - * have to worry about OOM or overhead. */ - if (get_cpu() == 0) { - ret = fn(call); - put_cpu(); - } else { - put_cpu(); - ret = work_on_cpu(0, fn, call); - } - - /* work_on_cpu can fail with -ENOMEM */ - if (ret < 0) - call->err = ret; - else - call->err = (call->eax >> 8) & 0xff; - - return ret; -} - -/** - * apm_bios_call - Make an APM BIOS 32bit call (on CPU 0) - * @call: the apm_bios_call registers. - * - * If there is an error, it is returned in @call.err. - */ -static int apm_bios_call(struct apm_bios_call *call) -{ - return on_cpu0(__apm_bios_call, call); -} - -/** - * __apm_bios_call_simple - Make an APM BIOS 32bit call (on CPU 0) - * @_call: pointer to struct apm_bios_call. - * - * Make a BIOS call that returns one value only, or just status. - * If there is an error, then the error code is returned in AH - * (bits 8-15 of eax) and this function returns non-zero (it can - * also return -ENOMEM). This is used for simpler BIOS operations. - * This call may hold interrupts off for a long time on some laptops. - * - * Note: this makes the call on the current CPU. - */ -static long __apm_bios_call_simple(void *_call) -{ - u8 error; - APM_DECL_SEGS - unsigned long flags; - int cpu; - struct desc_struct save_desc_40; - struct desc_struct *gdt; - struct apm_bios_call *call = _call; - - cpu = get_cpu(); - BUG_ON(cpu != 0); - gdt = get_cpu_gdt_table(cpu); - save_desc_40 = gdt[0x40 / 8]; - gdt[0x40 / 8] = bad_bios_desc; - - apm_irq_save(flags); - APM_DO_SAVE_SEGS; - error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, - &call->eax); - APM_DO_RESTORE_SEGS; - apm_irq_restore(flags); - gdt[0x40 / 8] = save_desc_40; - put_cpu(); - return error; -} - -/** - * apm_bios_call_simple - make a simple APM BIOS 32bit call - * @func: APM function to invoke - * @ebx_in: EBX register value for BIOS call - * @ecx_in: ECX register value for BIOS call - * @eax: EAX register on return from the BIOS call - * @err: bits - * - * Make a BIOS call that returns one value only, or just status. - * If there is an error, then the error code is returned in @err - * and this function returns non-zero. This is used for simpler - * BIOS operations. This call may hold interrupts off for a long - * time on some laptops. - */ -static int apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax, - int *err) -{ - struct apm_bios_call call; - int ret; - - call.func = func; - call.ebx = ebx_in; - call.ecx = ecx_in; - - ret = on_cpu0(__apm_bios_call_simple, &call); - *eax = call.eax; - *err = call.err; - return ret; -} - -/** - * apm_driver_version - APM driver version - * @val: loaded with the APM version on return - * - * Retrieve the APM version supported by the BIOS. This is only - * supported for APM 1.1 or higher. An error indicates APM 1.0 is - * probably present. - * - * On entry val should point to a value indicating the APM driver - * version with the high byte being the major and the low byte the - * minor number both in BCD - * - * On return it will hold the BIOS revision supported in the - * same format. - */ - -static int apm_driver_version(u_short *val) -{ - u32 eax; - int err; - - if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax, &err)) - return err; - *val = eax; - return APM_SUCCESS; -} - -/** - * apm_get_event - get an APM event from the BIOS - * @event: pointer to the event - * @info: point to the event information - * - * The APM BIOS provides a polled information for event - * reporting. The BIOS expects to be polled at least every second - * when events are pending. When a message is found the caller should - * poll until no more messages are present. However, this causes - * problems on some laptops where a suspend event notification is - * not cleared until it is acknowledged. - * - * Additional information is returned in the info pointer, providing - * that APM 1.2 is in use. If no messges are pending the value 0x80 - * is returned (No power management events pending). - */ -static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) -{ - struct apm_bios_call call; - - call.func = APM_FUNC_GET_EVENT; - call.ebx = call.ecx = 0; - - if (apm_bios_call(&call)) - return call.err; - - *event = call.ebx; - if (apm_info.connection_version < 0x0102) - *info = ~0; /* indicate info not valid */ - else - *info = call.ecx; - return APM_SUCCESS; -} - -/** - * set_power_state - set the power management state - * @what: which items to transition - * @state: state to transition to - * - * Request an APM change of state for one or more system devices. The - * processor state must be transitioned last of all. what holds the - * class of device in the upper byte and the device number (0xFF for - * all) for the object to be transitioned. - * - * The state holds the state to transition to, which may in fact - * be an acceptance of a BIOS requested state change. - */ - -static int set_power_state(u_short what, u_short state) -{ - u32 eax; - int err; - - if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax, &err)) - return err; - return APM_SUCCESS; -} - -/** - * set_system_power_state - set system wide power state - * @state: which state to enter - * - * Transition the entire system into a new APM power state. - */ - -static int set_system_power_state(u_short state) -{ - return set_power_state(APM_DEVICE_ALL, state); -} - -/** - * apm_do_idle - perform power saving - * - * This function notifies the BIOS that the processor is (in the view - * of the OS) idle. It returns -1 in the event that the BIOS refuses - * to handle the idle request. On a success the function returns 1 - * if the BIOS did clock slowing or 0 otherwise. - */ - -static int apm_do_idle(void) -{ - u32 eax; - u8 ret = 0; - int idled = 0; - int polling; - int err = 0; - - polling = !!(current_thread_info()->status & TS_POLLING); - if (polling) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - } - if (!need_resched()) { - idled = 1; - ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err); - } - if (polling) - current_thread_info()->status |= TS_POLLING; - - if (!idled) - return 0; - - if (ret) { - static unsigned long t; - - /* This always fails on some SMP boards running UP kernels. - * Only report the failure the first 5 times. - */ - if (++t < 5) { - printk(KERN_DEBUG "apm_do_idle failed (%d)\n", err); - t = jiffies; - } - return -1; - } - clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0; - return clock_slowed; -} - -/** - * apm_do_busy - inform the BIOS the CPU is busy - * - * Request that the BIOS brings the CPU back to full performance. - */ - -static void apm_do_busy(void) -{ - u32 dummy; - int err; - - if (clock_slowed || ALWAYS_CALL_BUSY) { - (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy, &err); - clock_slowed = 0; - } -} - -/* - * If no process has really been interested in - * the CPU for some time, we want to call BIOS - * power management - we probably want - * to conserve power. - */ -#define IDLE_CALC_LIMIT (HZ * 100) -#define IDLE_LEAKY_MAX 16 - -static void (*original_pm_idle)(void) __read_mostly; - -/** - * apm_cpu_idle - cpu idling for APM capable Linux - * - * This is the idling function the kernel executes when APM is available. It - * tries to do BIOS powermanagement based on the average system idle time. - * Furthermore it calls the system default idle routine. - */ - -static void apm_cpu_idle(void) -{ - static int use_apm_idle; /* = 0 */ - static unsigned int last_jiffies; /* = 0 */ - static unsigned int last_stime; /* = 0 */ - - int apm_idle_done = 0; - unsigned int jiffies_since_last_check = jiffies - last_jiffies; - unsigned int bucket; - - WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); -recalc: - if (jiffies_since_last_check > IDLE_CALC_LIMIT) { - use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; - } else if (jiffies_since_last_check > idle_period) { - unsigned int idle_percentage; - - idle_percentage = current->stime - last_stime; - idle_percentage *= 100; - idle_percentage /= jiffies_since_last_check; - use_apm_idle = (idle_percentage > idle_threshold); - if (apm_info.forbid_idle) - use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; - } - - bucket = IDLE_LEAKY_MAX; - - while (!need_resched()) { - if (use_apm_idle) { - unsigned int t; - - t = jiffies; - switch (apm_do_idle()) { - case 0: - apm_idle_done = 1; - if (t != jiffies) { - if (bucket) { - bucket = IDLE_LEAKY_MAX; - continue; - } - } else if (bucket) { - bucket--; - continue; - } - break; - case 1: - apm_idle_done = 1; - break; - default: /* BIOS refused */ - break; - } - } - if (original_pm_idle) - original_pm_idle(); - else - default_idle(); - local_irq_disable(); - jiffies_since_last_check = jiffies - last_jiffies; - if (jiffies_since_last_check > idle_period) - goto recalc; - } - - if (apm_idle_done) - apm_do_busy(); - - local_irq_enable(); -} - -/** - * apm_power_off - ask the BIOS to power off - * - * Handle the power off sequence. This is the one piece of code we - * will execute even on SMP machines. In order to deal with BIOS - * bugs we support real mode APM BIOS power off calls. We also make - * the SMP call on CPU0 as some systems will only honour this call - * on their first cpu. - */ - -static void apm_power_off(void) -{ - /* Some bioses don't like being called from CPU != 0 */ - if (apm_info.realmode_power_off) { - set_cpus_allowed_ptr(current, cpumask_of(0)); - machine_real_restart(MRR_APM); - } else { - (void)set_system_power_state(APM_STATE_OFF); - } -} - -#ifdef CONFIG_APM_DO_ENABLE - -/** - * apm_enable_power_management - enable BIOS APM power management - * @enable: enable yes/no - * - * Enable or disable the APM BIOS power services. - */ - -static int apm_enable_power_management(int enable) -{ - u32 eax; - int err; - - if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) - return APM_NOT_ENGAGED; - if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, - enable, &eax, &err)) - return err; - if (enable) - apm_info.bios.flags &= ~APM_BIOS_DISABLED; - else - apm_info.bios.flags |= APM_BIOS_DISABLED; - return APM_SUCCESS; -} -#endif - -/** - * apm_get_power_status - get current power state - * @status: returned status - * @bat: battery info - * @life: estimated life - * - * Obtain the current power status from the APM BIOS. We return a - * status which gives the rough battery status, and current power - * source. The bat value returned give an estimate as a percentage - * of life and a status value for the battery. The estimated life - * if reported is a lifetime in secodnds/minutes at current powwer - * consumption. - */ - -static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) -{ - struct apm_bios_call call; - - call.func = APM_FUNC_GET_STATUS; - call.ebx = APM_DEVICE_ALL; - call.ecx = 0; - - if (apm_info.get_power_status_broken) - return APM_32_UNSUPPORTED; - if (apm_bios_call(&call)) - return call.err; - *status = call.ebx; - *bat = call.ecx; - if (apm_info.get_power_status_swabinminutes) { - *life = swab16((u16)call.edx); - *life |= 0x8000; - } else - *life = call.edx; - return APM_SUCCESS; -} - -#if 0 -static int apm_get_battery_status(u_short which, u_short *status, - u_short *bat, u_short *life, u_short *nbat) -{ - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - - if (apm_info.connection_version < 0x0102) { - /* pretend we only have one battery. */ - if (which != 1) - return APM_BAD_DEVICE; - *nbat = 1; - return apm_get_power_status(status, bat, life); - } - - if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax, - &ebx, &ecx, &edx, &esi)) - return (eax >> 8) & 0xff; - *status = ebx; - *bat = ecx; - *life = edx; - *nbat = esi; - return APM_SUCCESS; -} -#endif - -/** - * apm_engage_power_management - enable PM on a device - * @device: identity of device - * @enable: on/off - * - * Activate or deactive power management on either a specific device - * or the entire system (%APM_DEVICE_ALL). - */ - -static int apm_engage_power_management(u_short device, int enable) -{ - u32 eax; - int err; - - if ((enable == 0) && (device == APM_DEVICE_ALL) - && (apm_info.bios.flags & APM_BIOS_DISABLED)) - return APM_DISABLED; - if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, - &eax, &err)) - return err; - if (device == APM_DEVICE_ALL) { - if (enable) - apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; - else - apm_info.bios.flags |= APM_BIOS_DISENGAGED; - } - return APM_SUCCESS; -} - -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - -/** - * apm_console_blank - blank the display - * @blank: on/off - * - * Attempt to blank the console, firstly by blanking just video device - * zero, and if that fails (some BIOSes don't support it) then it blanks - * all video devices. Typically the BIOS will do laptop backlight and - * monitor powerdown for us. - */ - -static int apm_console_blank(int blank) -{ - int error = APM_NOT_ENGAGED; /* silence gcc */ - int i; - u_short state; - static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; - - state = blank ? APM_STATE_STANDBY : APM_STATE_READY; - - for (i = 0; i < ARRAY_SIZE(dev); i++) { - error = set_power_state(dev[i], state); - - if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) - return 1; - - if (error == APM_NOT_ENGAGED) - break; - } - - if (error == APM_NOT_ENGAGED) { - static int tried; - int eng_error; - if (tried++ == 0) { - eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1); - if (eng_error) { - apm_error("set display", error); - apm_error("engage interface", eng_error); - return 0; - } else - return apm_console_blank(blank); - } - } - apm_error("set display", error); - return 0; -} -#endif - -static int queue_empty(struct apm_user *as) -{ - return as->event_head == as->event_tail; -} - -static apm_event_t get_queued_event(struct apm_user *as) -{ - if (++as->event_tail >= APM_MAX_EVENTS) - as->event_tail = 0; - return as->events[as->event_tail]; -} - -static void queue_event(apm_event_t event, struct apm_user *sender) -{ - struct apm_user *as; - - spin_lock(&user_list_lock); - if (user_list == NULL) - goto out; - for (as = user_list; as != NULL; as = as->next) { - if ((as == sender) || (!as->reader)) - continue; - if (++as->event_head >= APM_MAX_EVENTS) - as->event_head = 0; - - if (as->event_head == as->event_tail) { - static int notified; - - if (notified++ == 0) - printk(KERN_ERR "apm: an event queue overflowed\n"); - if (++as->event_tail >= APM_MAX_EVENTS) - as->event_tail = 0; - } - as->events[as->event_head] = event; - if (!as->suser || !as->writer) - continue; - switch (event) { - case APM_SYS_SUSPEND: - case APM_USER_SUSPEND: - as->suspends_pending++; - suspends_pending++; - break; - - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - as->standbys_pending++; - standbys_pending++; - break; - } - } - wake_up_interruptible(&apm_waitqueue); -out: - spin_unlock(&user_list_lock); -} - -static void reinit_timer(void) -{ -#ifdef INIT_TIMER_AFTER_SUSPEND - unsigned long flags; - - raw_spin_lock_irqsave(&i8253_lock, flags); - /* set the clock to HZ */ - outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ - udelay(10); - outb_p(LATCH >> 8, PIT_CH0); /* MSB */ - udelay(10); - raw_spin_unlock_irqrestore(&i8253_lock, flags); -#endif -} - -static int suspend(int vetoable) -{ - int err; - struct apm_user *as; - - dpm_suspend_start(PMSG_SUSPEND); - dpm_suspend_end(PMSG_SUSPEND); - - local_irq_disable(); - syscore_suspend(); - - local_irq_enable(); - - save_processor_state(); - err = set_system_power_state(APM_STATE_SUSPEND); - ignore_normal_resume = 1; - restore_processor_state(); - - local_irq_disable(); - reinit_timer(); - - if (err == APM_NO_ERROR) - err = APM_SUCCESS; - if (err != APM_SUCCESS) - apm_error("suspend", err); - err = (err == APM_SUCCESS) ? 0 : -EIO; - - syscore_resume(); - local_irq_enable(); - - dpm_resume_start(PMSG_RESUME); - dpm_resume_end(PMSG_RESUME); - - queue_event(APM_NORMAL_RESUME, NULL); - spin_lock(&user_list_lock); - for (as = user_list; as != NULL; as = as->next) { - as->suspend_wait = 0; - as->suspend_result = err; - } - spin_unlock(&user_list_lock); - wake_up_interruptible(&apm_suspend_waitqueue); - return err; -} - -static void standby(void) -{ - int err; - - dpm_suspend_end(PMSG_SUSPEND); - - local_irq_disable(); - syscore_suspend(); - local_irq_enable(); - - err = set_system_power_state(APM_STATE_STANDBY); - if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) - apm_error("standby", err); - - local_irq_disable(); - syscore_resume(); - local_irq_enable(); - - dpm_resume_start(PMSG_RESUME); -} - -static apm_event_t get_event(void) -{ - int error; - apm_event_t event = APM_NO_EVENTS; /* silence gcc */ - apm_eventinfo_t info; - - static int notified; - - /* we don't use the eventinfo */ - error = apm_get_event(&event, &info); - if (error == APM_SUCCESS) - return event; - - if ((error != APM_NO_EVENTS) && (notified++ == 0)) - apm_error("get_event", error); - - return 0; -} - -static void check_events(void) -{ - apm_event_t event; - static unsigned long last_resume; - static int ignore_bounce; - - while ((event = get_event()) != 0) { - if (debug) { - if (event <= NR_APM_EVENT_NAME) - printk(KERN_DEBUG "apm: received %s notify\n", - apm_event_name[event - 1]); - else - printk(KERN_DEBUG "apm: received unknown " - "event 0x%02x\n", event); - } - if (ignore_bounce - && (time_after(jiffies, last_resume + bounce_interval))) - ignore_bounce = 0; - - switch (event) { - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - queue_event(event, NULL); - if (standbys_pending <= 0) - standby(); - break; - - case APM_USER_SUSPEND: -#ifdef CONFIG_APM_IGNORE_USER_SUSPEND - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - break; -#endif - case APM_SYS_SUSPEND: - if (ignore_bounce) { - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - break; - } - /* - * If we are already processing a SUSPEND, - * then further SUSPEND events from the BIOS - * will be ignored. We also return here to - * cope with the fact that the Thinkpads keep - * sending a SUSPEND event until something else - * happens! - */ - if (ignore_sys_suspend) - return; - ignore_sys_suspend = 1; - queue_event(event, NULL); - if (suspends_pending <= 0) - (void) suspend(1); - break; - - case APM_NORMAL_RESUME: - case APM_CRITICAL_RESUME: - case APM_STANDBY_RESUME: - ignore_sys_suspend = 0; - last_resume = jiffies; - ignore_bounce = 1; - if ((event != APM_NORMAL_RESUME) - || (ignore_normal_resume == 0)) { - dpm_resume_end(PMSG_RESUME); - queue_event(event, NULL); - } - ignore_normal_resume = 0; - break; - - case APM_CAPABILITY_CHANGE: - case APM_LOW_BATTERY: - case APM_POWER_STATUS_CHANGE: - queue_event(event, NULL); - /* If needed, notify drivers here */ - break; - - case APM_UPDATE_TIME: - break; - - case APM_CRITICAL_SUSPEND: - /* - * We are not allowed to reject a critical suspend. - */ - (void)suspend(0); - break; - } - } -} - -static void apm_event_handler(void) -{ - static int pending_count = 4; - int err; - - if ((standbys_pending > 0) || (suspends_pending > 0)) { - if ((apm_info.connection_version > 0x100) && - (pending_count-- <= 0)) { - pending_count = 4; - if (debug) - printk(KERN_DEBUG "apm: setting state busy\n"); - err = set_system_power_state(APM_STATE_BUSY); - if (err) - apm_error("busy", err); - } - } else - pending_count = 4; - check_events(); -} - -/* - * This is the APM thread main loop. - */ - -static void apm_mainloop(void) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&apm_waitqueue, &wait); - set_current_state(TASK_INTERRUPTIBLE); - for (;;) { - schedule_timeout(APM_CHECK_TIMEOUT); - if (kthread_should_stop()) - break; - /* - * Ok, check all events, check for idle (and mark us sleeping - * so as not to count towards the load average).. - */ - set_current_state(TASK_INTERRUPTIBLE); - apm_event_handler(); - } - remove_wait_queue(&apm_waitqueue, &wait); -} - -static int check_apm_user(struct apm_user *as, const char *func) -{ - if (as == NULL || as->magic != APM_BIOS_MAGIC) { - printk(KERN_ERR "apm: %s passed bad filp\n", func); - return 1; - } - return 0; -} - -static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) -{ - struct apm_user *as; - int i; - apm_event_t event; - - as = fp->private_data; - if (check_apm_user(as, "read")) - return -EIO; - if ((int)count < sizeof(apm_event_t)) - return -EINVAL; - if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK)) - return -EAGAIN; - wait_event_interruptible(apm_waitqueue, !queue_empty(as)); - i = count; - while ((i >= sizeof(event)) && !queue_empty(as)) { - event = get_queued_event(as); - if (copy_to_user(buf, &event, sizeof(event))) { - if (i < count) - break; - return -EFAULT; - } - switch (event) { - case APM_SYS_SUSPEND: - case APM_USER_SUSPEND: - as->suspends_read++; - break; - - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - as->standbys_read++; - break; - } - buf += sizeof(event); - i -= sizeof(event); - } - if (i < count) - return count - i; - if (signal_pending(current)) - return -ERESTARTSYS; - return 0; -} - -static unsigned int do_poll(struct file *fp, poll_table *wait) -{ - struct apm_user *as; - - as = fp->private_data; - if (check_apm_user(as, "poll")) - return 0; - poll_wait(fp, &apm_waitqueue, wait); - if (!queue_empty(as)) - return POLLIN | POLLRDNORM; - return 0; -} - -static long do_ioctl(struct file *filp, u_int cmd, u_long arg) -{ - struct apm_user *as; - int ret; - - as = filp->private_data; - if (check_apm_user(as, "ioctl")) - return -EIO; - if (!as->suser || !as->writer) - return -EPERM; - switch (cmd) { - case APM_IOC_STANDBY: - mutex_lock(&apm_mutex); - if (as->standbys_read > 0) { - as->standbys_read--; - as->standbys_pending--; - standbys_pending--; - } else - queue_event(APM_USER_STANDBY, as); - if (standbys_pending <= 0) - standby(); - mutex_unlock(&apm_mutex); - break; - case APM_IOC_SUSPEND: - mutex_lock(&apm_mutex); - if (as->suspends_read > 0) { - as->suspends_read--; - as->suspends_pending--; - suspends_pending--; - } else - queue_event(APM_USER_SUSPEND, as); - if (suspends_pending <= 0) { - ret = suspend(1); - mutex_unlock(&apm_mutex); - } else { - as->suspend_wait = 1; - mutex_unlock(&apm_mutex); - wait_event_interruptible(apm_suspend_waitqueue, - as->suspend_wait == 0); - ret = as->suspend_result; - } - return ret; - default: - return -ENOTTY; - } - return 0; -} - -static int do_release(struct inode *inode, struct file *filp) -{ - struct apm_user *as; - - as = filp->private_data; - if (check_apm_user(as, "release")) - return 0; - filp->private_data = NULL; - if (as->standbys_pending > 0) { - standbys_pending -= as->standbys_pending; - if (standbys_pending <= 0) - standby(); - } - if (as->suspends_pending > 0) { - suspends_pending -= as->suspends_pending; - if (suspends_pending <= 0) - (void) suspend(1); - } - spin_lock(&user_list_lock); - if (user_list == as) - user_list = as->next; - else { - struct apm_user *as1; - - for (as1 = user_list; - (as1 != NULL) && (as1->next != as); - as1 = as1->next) - ; - if (as1 == NULL) - printk(KERN_ERR "apm: filp not in user list\n"); - else - as1->next = as->next; - } - spin_unlock(&user_list_lock); - kfree(as); - return 0; -} - -static int do_open(struct inode *inode, struct file *filp) -{ - struct apm_user *as; - - as = kmalloc(sizeof(*as), GFP_KERNEL); - if (as == NULL) { - printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", - sizeof(*as)); - return -ENOMEM; - } - as->magic = APM_BIOS_MAGIC; - as->event_tail = as->event_head = 0; - as->suspends_pending = as->standbys_pending = 0; - as->suspends_read = as->standbys_read = 0; - /* - * XXX - this is a tiny bit broken, when we consider BSD - * process accounting. If the device is opened by root, we - * instantly flag that we used superuser privs. Who knows, - * we might close the device immediately without doing a - * privileged operation -- cevans - */ - as->suser = capable(CAP_SYS_ADMIN); - as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE; - as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ; - spin_lock(&user_list_lock); - as->next = user_list; - user_list = as; - spin_unlock(&user_list_lock); - filp->private_data = as; - return 0; -} - -static int proc_apm_show(struct seq_file *m, void *v) -{ - unsigned short bx; - unsigned short cx; - unsigned short dx; - int error; - unsigned short ac_line_status = 0xff; - unsigned short battery_status = 0xff; - unsigned short battery_flag = 0xff; - int percentage = -1; - int time_units = -1; - char *units = "?"; - - if ((num_online_cpus() == 1) && - !(error = apm_get_power_status(&bx, &cx, &dx))) { - ac_line_status = (bx >> 8) & 0xff; - battery_status = bx & 0xff; - if ((cx & 0xff) != 0xff) - percentage = cx & 0xff; - - if (apm_info.connection_version > 0x100) { - battery_flag = (cx >> 8) & 0xff; - if (dx != 0xffff) { - units = (dx & 0x8000) ? "min" : "sec"; - time_units = dx & 0x7fff; - } - } - } - /* Arguments, with symbols from linux/apm_bios.h. Information is - from the Get Power Status (0x0a) call unless otherwise noted. - - 0) Linux driver version (this will change if format changes) - 1) APM BIOS Version. Usually 1.0, 1.1 or 1.2. - 2) APM flags from APM Installation Check (0x00): - bit 0: APM_16_BIT_SUPPORT - bit 1: APM_32_BIT_SUPPORT - bit 2: APM_IDLE_SLOWS_CLOCK - bit 3: APM_BIOS_DISABLED - bit 4: APM_BIOS_DISENGAGED - 3) AC line status - 0x00: Off-line - 0x01: On-line - 0x02: On backup power (BIOS >= 1.1 only) - 0xff: Unknown - 4) Battery status - 0x00: High - 0x01: Low - 0x02: Critical - 0x03: Charging - 0x04: Selected battery not present (BIOS >= 1.2 only) - 0xff: Unknown - 5) Battery flag - bit 0: High - bit 1: Low - bit 2: Critical - bit 3: Charging - bit 7: No system battery - 0xff: Unknown - 6) Remaining battery life (percentage of charge): - 0-100: valid - -1: Unknown - 7) Remaining battery life (time units): - Number of remaining minutes or seconds - -1: Unknown - 8) min = minutes; sec = seconds */ - - seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", - driver_version, - (apm_info.bios.version >> 8) & 0xff, - apm_info.bios.version & 0xff, - apm_info.bios.flags, - ac_line_status, - battery_status, - battery_flag, - percentage, - time_units, - units); - return 0; -} - -static int proc_apm_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_apm_show, NULL); -} - -static const struct file_operations apm_file_ops = { - .owner = THIS_MODULE, - .open = proc_apm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int apm(void *unused) -{ - unsigned short bx; - unsigned short cx; - unsigned short dx; - int error; - char *power_stat; - char *bat_stat; - - /* 2002/08/01 - WT - * This is to avoid random crashes at boot time during initialization - * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. - * Some bioses don't like being called from CPU != 0. - * Method suggested by Ingo Molnar. - */ - set_cpus_allowed_ptr(current, cpumask_of(0)); - BUG_ON(smp_processor_id() != 0); - - if (apm_info.connection_version == 0) { - apm_info.connection_version = apm_info.bios.version; - if (apm_info.connection_version > 0x100) { - /* - * We only support BIOSs up to version 1.2 - */ - if (apm_info.connection_version > 0x0102) - apm_info.connection_version = 0x0102; - error = apm_driver_version(&apm_info.connection_version); - if (error != APM_SUCCESS) { - apm_error("driver version", error); - /* Fall back to an APM 1.0 connection. */ - apm_info.connection_version = 0x100; - } - } - } - - if (debug) - printk(KERN_INFO "apm: Connection version %d.%d\n", - (apm_info.connection_version >> 8) & 0xff, - apm_info.connection_version & 0xff); - -#ifdef CONFIG_APM_DO_ENABLE - if (apm_info.bios.flags & APM_BIOS_DISABLED) { - /* - * This call causes my NEC UltraLite Versa 33/C to hang if it - * is booted with PM disabled but not in the docking station. - * Unfortunate ... - */ - error = apm_enable_power_management(1); - if (error) { - apm_error("enable power management", error); - return -1; - } - } -#endif - - if ((apm_info.bios.flags & APM_BIOS_DISENGAGED) - && (apm_info.connection_version > 0x0100)) { - error = apm_engage_power_management(APM_DEVICE_ALL, 1); - if (error) { - apm_error("engage power management", error); - return -1; - } - } - - if (debug && (num_online_cpus() == 1 || smp)) { - error = apm_get_power_status(&bx, &cx, &dx); - if (error) - printk(KERN_INFO "apm: power status not available\n"); - else { - switch ((bx >> 8) & 0xff) { - case 0: - power_stat = "off line"; - break; - case 1: - power_stat = "on line"; - break; - case 2: - power_stat = "on backup power"; - break; - default: - power_stat = "unknown"; - break; - } - switch (bx & 0xff) { - case 0: - bat_stat = "high"; - break; - case 1: - bat_stat = "low"; - break; - case 2: - bat_stat = "critical"; - break; - case 3: - bat_stat = "charging"; - break; - default: - bat_stat = "unknown"; - break; - } - printk(KERN_INFO - "apm: AC %s, battery status %s, battery life ", - power_stat, bat_stat); - if ((cx & 0xff) == 0xff) - printk("unknown\n"); - else - printk("%d%%\n", cx & 0xff); - if (apm_info.connection_version > 0x100) { - printk(KERN_INFO - "apm: battery flag 0x%02x, battery life ", - (cx >> 8) & 0xff); - if (dx == 0xffff) - printk("unknown\n"); - else - printk("%d %s\n", dx & 0x7fff, - (dx & 0x8000) ? - "minutes" : "seconds"); - } - } - } - - /* Install our power off handler.. */ - if (power_off) - pm_power_off = apm_power_off; - - if (num_online_cpus() == 1 || smp) { -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - console_blank_hook = apm_console_blank; -#endif - apm_mainloop(); -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - console_blank_hook = NULL; -#endif - } - - return 0; -} - -#ifndef MODULE -static int __init apm_setup(char *str) -{ - int invert; - - while ((str != NULL) && (*str != '\0')) { - if (strncmp(str, "off", 3) == 0) - apm_disabled = 1; - if (strncmp(str, "on", 2) == 0) - apm_disabled = 0; - if ((strncmp(str, "bounce-interval=", 16) == 0) || - (strncmp(str, "bounce_interval=", 16) == 0)) - bounce_interval = simple_strtol(str + 16, NULL, 0); - if ((strncmp(str, "idle-threshold=", 15) == 0) || - (strncmp(str, "idle_threshold=", 15) == 0)) - idle_threshold = simple_strtol(str + 15, NULL, 0); - if ((strncmp(str, "idle-period=", 12) == 0) || - (strncmp(str, "idle_period=", 12) == 0)) - idle_period = simple_strtol(str + 12, NULL, 0); - invert = (strncmp(str, "no-", 3) == 0) || - (strncmp(str, "no_", 3) == 0); - if (invert) - str += 3; - if (strncmp(str, "debug", 5) == 0) - debug = !invert; - if ((strncmp(str, "power-off", 9) == 0) || - (strncmp(str, "power_off", 9) == 0)) - power_off = !invert; - if (strncmp(str, "smp", 3) == 0) { - smp = !invert; - idle_threshold = 100; - } - if ((strncmp(str, "allow-ints", 10) == 0) || - (strncmp(str, "allow_ints", 10) == 0)) - apm_info.allow_ints = !invert; - if ((strncmp(str, "broken-psr", 10) == 0) || - (strncmp(str, "broken_psr", 10) == 0)) - apm_info.get_power_status_broken = !invert; - if ((strncmp(str, "realmode-power-off", 18) == 0) || - (strncmp(str, "realmode_power_off", 18) == 0)) - apm_info.realmode_power_off = !invert; - str = strchr(str, ','); - if (str != NULL) - str += strspn(str, ", \t"); - } - return 1; -} - -__setup("apm=", apm_setup); -#endif - -static const struct file_operations apm_bios_fops = { - .owner = THIS_MODULE, - .read = do_read, - .poll = do_poll, - .unlocked_ioctl = do_ioctl, - .open = do_open, - .release = do_release, - .llseek = noop_llseek, -}; - -static struct miscdevice apm_device = { - APM_MINOR_DEV, - "apm_bios", - &apm_bios_fops -}; - - -/* Simple "print if true" callback */ -static int __init print_if_true(const struct dmi_system_id *d) -{ - printk("%s\n", d->ident); - return 0; -} - -/* - * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was - * disabled before the suspend. Linux used to get terribly confused by that. - */ -static int __init broken_ps2_resume(const struct dmi_system_id *d) -{ - printk(KERN_INFO "%s machine detected. Mousepad Resume Bug " - "workaround hopefully not needed.\n", d->ident); - return 0; -} - -/* Some bioses have a broken protected mode poweroff and need to use realmode */ -static int __init set_realmode_power_off(const struct dmi_system_id *d) -{ - if (apm_info.realmode_power_off == 0) { - apm_info.realmode_power_off = 1; - printk(KERN_INFO "%s bios detected. " - "Using realmode poweroff only.\n", d->ident); - } - return 0; -} - -/* Some laptops require interrupts to be enabled during APM calls */ -static int __init set_apm_ints(const struct dmi_system_id *d) -{ - if (apm_info.allow_ints == 0) { - apm_info.allow_ints = 1; - printk(KERN_INFO "%s machine detected. " - "Enabling interrupts during APM calls.\n", d->ident); - } - return 0; -} - -/* Some APM bioses corrupt memory or just plain do not work */ -static int __init apm_is_horked(const struct dmi_system_id *d) -{ - if (apm_info.disabled == 0) { - apm_info.disabled = 1; - printk(KERN_INFO "%s machine detected. " - "Disabling APM.\n", d->ident); - } - return 0; -} - -static int __init apm_is_horked_d850md(const struct dmi_system_id *d) -{ - if (apm_info.disabled == 0) { - apm_info.disabled = 1; - printk(KERN_INFO "%s machine detected. " - "Disabling APM.\n", d->ident); - printk(KERN_INFO "This bug is fixed in bios P15 which is available for\n"); - printk(KERN_INFO "download from support.intel.com\n"); - } - return 0; -} - -/* Some APM bioses hang on APM idle calls */ -static int __init apm_likes_to_melt(const struct dmi_system_id *d) -{ - if (apm_info.forbid_idle == 0) { - apm_info.forbid_idle = 1; - printk(KERN_INFO "%s machine detected. " - "Disabling APM idle calls.\n", d->ident); - } - return 0; -} - -/* - * Check for clue free BIOS implementations who use - * the following QA technique - * - * [ Write BIOS Code ]<------ - * | ^ - * < Does it Compile >----N-- - * |Y ^ - * < Does it Boot Win98 >-N-- - * |Y - * [Ship It] - * - * Phoenix A04 08/24/2000 is known bad (Dell Inspiron 5000e) - * Phoenix A07 09/29/2000 is known good (Dell Inspiron 5000) - */ -static int __init broken_apm_power(const struct dmi_system_id *d) -{ - apm_info.get_power_status_broken = 1; - printk(KERN_WARNING "BIOS strings suggest APM bugs, " - "disabling power status reporting.\n"); - return 0; -} - -/* - * This bios swaps the APM minute reporting bytes over (Many sony laptops - * have this problem). - */ -static int __init swab_apm_power_in_minutes(const struct dmi_system_id *d) -{ - apm_info.get_power_status_swabinminutes = 1; - printk(KERN_WARNING "BIOS strings suggest APM reports battery life " - "in minutes and wrong byte order.\n"); - return 0; -} - -static struct dmi_system_id __initdata apm_dmi_table[] = { - { - print_if_true, - KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.", - { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), }, - }, - { /* Handle problems with APM on the C600 */ - broken_ps2_resume, "Dell Latitude C600", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), }, - }, - { /* Allow interrupts during suspend on Dell Latitude laptops*/ - set_apm_ints, "Dell Latitude", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), } - }, - { /* APM crashes */ - apm_is_horked, "Dell Inspiron 2500", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A11"), }, - }, - { /* Allow interrupts during suspend on Dell Inspiron laptops*/ - set_apm_ints, "Dell Inspiron", { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), }, - }, - { /* Handle problems with APM on Inspiron 5000e */ - broken_apm_power, "Dell Inspiron 5000e", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A04"), - DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), }, - }, - { /* Handle problems with APM on Inspiron 2500 */ - broken_apm_power, "Dell Inspiron 2500", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A12"), - DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell Dimension 4100", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), - DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "A11"), }, - }, - { /* Allow interrupts during suspend on Compaq Laptops*/ - set_apm_ints, "Compaq 12XL125", - { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), - DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"), - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "4.06"), }, - }, - { /* Allow interrupts during APM or the clock goes slow */ - set_apm_ints, "ASUSTeK", - { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), }, - }, - { /* APM blows on shutdown */ - apm_is_horked, "ABIT KX7-333[R]", - { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"), - DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), }, - }, - { /* APM crashes */ - apm_is_horked, "Trigem Delhi3", - { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"), - DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), }, - }, - { /* APM crashes */ - apm_is_horked, "Fujitsu-Siemens", - { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"), - DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), }, - }, - { /* APM crashes */ - apm_is_horked_d850md, "Intel D850MD", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), }, - }, - { /* APM crashes */ - apm_is_horked, "Intel D810EMO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell XPS-Z", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "A11"), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), }, - }, - { /* APM crashes */ - apm_is_horked, "Sharp PC-PJ/AX", - { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), - DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"), - DMI_MATCH(DMI_BIOS_VENDOR, "SystemSoft"), - DMI_MATCH(DMI_BIOS_VERSION, "Version R2.08"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell Inspiron 2500", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A11"), }, - }, - { /* APM idle hangs */ - apm_likes_to_melt, "Jabil AMD", - { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), }, - }, - { /* APM idle hangs */ - apm_likes_to_melt, "AMI Bios", - { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0206H"), - DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-N505VX */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"), - DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-XG29 */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"), - DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"), - DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"), - DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"), - DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"), - DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-F104K */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"), - DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), }, - }, - - { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"), - DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"), - DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"), - DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), }, - }, - { /* broken PM poweroff bios */ - set_realmode_power_off, "Award Software v4.60 PGMA", - { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"), - DMI_MATCH(DMI_BIOS_DATE, "134526184"), }, - }, - - /* Generic per vendor APM settings */ - - { /* Allow interrupts during suspend on IBM laptops */ - set_apm_ints, "IBM", - { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, - }, - - { } -}; - -/* - * Just start the APM thread. We do NOT want to do APM BIOS - * calls from anything but the APM thread, if for no other reason - * than the fact that we don't trust the APM BIOS. This way, - * most common APM BIOS problems that lead to protection errors - * etc will have at least some level of being contained... - * - * In short, if something bad happens, at least we have a choice - * of just killing the apm thread.. - */ -static int __init apm_init(void) -{ - struct desc_struct *gdt; - int err; - - dmi_check_system(apm_dmi_table); - - if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { - printk(KERN_INFO "apm: BIOS not found.\n"); - return -ENODEV; - } - printk(KERN_INFO - "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n", - ((apm_info.bios.version >> 8) & 0xff), - (apm_info.bios.version & 0xff), - apm_info.bios.flags, - driver_version); - if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) { - printk(KERN_INFO "apm: no 32 bit BIOS support\n"); - return -ENODEV; - } - - if (allow_ints) - apm_info.allow_ints = 1; - if (broken_psr) - apm_info.get_power_status_broken = 1; - if (realmode_power_off) - apm_info.realmode_power_off = 1; - /* User can override, but default is to trust DMI */ - if (apm_disabled != -1) - apm_info.disabled = apm_disabled; - - /* - * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1 - * but is reportedly a 1.0 BIOS. - */ - if (apm_info.bios.version == 0x001) - apm_info.bios.version = 0x100; - - /* BIOS < 1.2 doesn't set cseg_16_len */ - if (apm_info.bios.version < 0x102) - apm_info.bios.cseg_16_len = 0; /* 64k */ - - if (debug) { - printk(KERN_INFO "apm: entry %x:%x cseg16 %x dseg %x", - apm_info.bios.cseg, apm_info.bios.offset, - apm_info.bios.cseg_16, apm_info.bios.dseg); - if (apm_info.bios.version > 0x100) - printk(" cseg len %x, dseg len %x", - apm_info.bios.cseg_len, - apm_info.bios.dseg_len); - if (apm_info.bios.version > 0x101) - printk(" cseg16 len %x", apm_info.bios.cseg_16_len); - printk("\n"); - } - - if (apm_info.disabled) { - printk(KERN_NOTICE "apm: disabled on user request.\n"); - return -ENODEV; - } - if ((num_online_cpus() > 1) && !power_off && !smp) { - printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); - apm_info.disabled = 1; - return -ENODEV; - } - if (!acpi_disabled) { - printk(KERN_NOTICE "apm: overridden by ACPI.\n"); - apm_info.disabled = 1; - return -ENODEV; - } - - /* - * Set up the long jump entry point to the APM BIOS, which is called - * from inline assembly. - */ - apm_bios_entry.offset = apm_info.bios.offset; - apm_bios_entry.segment = APM_CS; - - /* - * The APM 1.1 BIOS is supposed to provide limit information that it - * recognizes. Many machines do this correctly, but many others do - * not restrict themselves to their claimed limit. When this happens, - * they will cause a segmentation violation in the kernel at boot time. - * Most BIOS's, however, will respect a 64k limit, so we use that. - * - * Note we only set APM segments on CPU zero, since we pin the APM - * code to that CPU. - */ - gdt = get_cpu_gdt_table(0); - set_desc_base(&gdt[APM_CS >> 3], - (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4)); - set_desc_base(&gdt[APM_CS_16 >> 3], - (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_desc_base(&gdt[APM_DS >> 3], - (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); - - proc_create("apm", 0, NULL, &apm_file_ops); - - kapmd_task = kthread_create(apm, NULL, "kapmd"); - if (IS_ERR(kapmd_task)) { - printk(KERN_ERR "apm: disabled - Unable to start kernel " - "thread.\n"); - err = PTR_ERR(kapmd_task); - kapmd_task = NULL; - remove_proc_entry("apm", NULL); - return err; - } - wake_up_process(kapmd_task); - - if (num_online_cpus() > 1 && !smp) { - printk(KERN_NOTICE - "apm: disabled - APM is not SMP safe (power off active).\n"); - return 0; - } - - /* - * Note we don't actually care if the misc_device cannot be registered. - * this driver can do its job without it, even if userspace can't - * control it. just log the error - */ - if (misc_register(&apm_device)) - printk(KERN_WARNING "apm: Could not register misc device.\n"); - - if (HZ != 100) - idle_period = (idle_period * HZ) / 100; - if (idle_threshold < 100) { - original_pm_idle = pm_idle; - pm_idle = apm_cpu_idle; - set_pm_idle = 1; - } - - return 0; -} - -static void __exit apm_exit(void) -{ - int error; - - if (set_pm_idle) { - pm_idle = original_pm_idle; - /* - * We are about to unload the current idle thread pm callback - * (pm_idle), Wait for all processors to update cached/local - * copies of pm_idle before proceeding. - */ - cpu_idle_wait(); - } - if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) - && (apm_info.connection_version > 0x0100)) { - error = apm_engage_power_management(APM_DEVICE_ALL, 0); - if (error) - apm_error("disengage power management", error); - } - misc_deregister(&apm_device); - remove_proc_entry("apm", NULL); - if (power_off) - pm_power_off = NULL; - if (kapmd_task) { - kthread_stop(kapmd_task); - kapmd_task = NULL; - } -} - -module_init(apm_init); -module_exit(apm_exit); - -MODULE_AUTHOR("Stephen Rothwell"); -MODULE_DESCRIPTION("Advanced Power Management"); -MODULE_LICENSE("GPL"); -module_param(debug, bool, 0644); -MODULE_PARM_DESC(debug, "Enable debug mode"); -module_param(power_off, bool, 0444); -MODULE_PARM_DESC(power_off, "Enable power off"); -module_param(bounce_interval, int, 0444); -MODULE_PARM_DESC(bounce_interval, - "Set the number of ticks to ignore suspend bounces"); -module_param(allow_ints, bool, 0444); -MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls"); -module_param(broken_psr, bool, 0444); -MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call"); -module_param(realmode_power_off, bool, 0444); -MODULE_PARM_DESC(realmode_power_off, - "Switch to real mode before powering off"); -module_param(idle_threshold, int, 0444); -MODULE_PARM_DESC(idle_threshold, - "System idle percentage above which to make APM BIOS idle calls"); -module_param(idle_period, int, 0444); -MODULE_PARM_DESC(idle_period, - "Period (in sec/100) over which to caculate the idle percentage"); -module_param(smp, bool, 0444); -MODULE_PARM_DESC(smp, - "Set this to enable APM use on an SMP platform. Use with caution on older systems"); -MODULE_ALIAS_MISCDEV(APM_MINOR_DEV); diff --git a/ANDROID_3.4.5/arch/x86/kernel/asm-offsets.c b/ANDROID_3.4.5/arch/x86/kernel/asm-offsets.c deleted file mode 100644 index 68de2dc9..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/asm-offsets.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed to extract - * and format the required data. - */ -#define COMPILE_OFFSETS - -#include <linux/crypto.h> -#include <linux/sched.h> -#include <linux/stddef.h> -#include <linux/hardirq.h> -#include <linux/suspend.h> -#include <linux/kbuild.h> -#include <asm/processor.h> -#include <asm/thread_info.h> -#include <asm/sigframe.h> -#include <asm/bootparam.h> -#include <asm/suspend.h> - -#ifdef CONFIG_XEN -#include <xen/interface/xen.h> -#endif - -#ifdef CONFIG_X86_32 -# include "asm-offsets_32.c" -#else -# include "asm-offsets_64.c" -#endif - -void common(void) { - BLANK(); - OFFSET(TI_flags, thread_info, flags); - OFFSET(TI_status, thread_info, status); - OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_preempt_count, thread_info, preempt_count); - - BLANK(); - OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); - - BLANK(); - OFFSET(pbe_address, pbe, address); - OFFSET(pbe_orig_address, pbe, orig_address); - OFFSET(pbe_next, pbe, next); - -#ifdef CONFIG_PARAVIRT - BLANK(); - OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); - OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); - OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); - OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); - OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); - OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); - OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); - OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); -#endif - -#ifdef CONFIG_XEN - BLANK(); - OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); - OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); -#endif - - BLANK(); - OFFSET(BP_scratch, boot_params, scratch); - OFFSET(BP_loadflags, boot_params, hdr.loadflags); - OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); - OFFSET(BP_version, boot_params, hdr.version); - OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); - OFFSET(BP_pref_address, boot_params, hdr.pref_address); - OFFSET(BP_code32_start, boot_params, hdr.code32_start); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/asm-offsets_32.c b/ANDROID_3.4.5/arch/x86/kernel/asm-offsets_32.c deleted file mode 100644 index 85d98ab1..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/asm-offsets_32.c +++ /dev/null @@ -1,87 +0,0 @@ -#include <asm/ucontext.h> - -#include <linux/lguest.h> -#include "../../../drivers/lguest/lg.h" - -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, -static char syscalls[] = { -#include <asm/syscalls_32.h> -}; - -/* workaround for a warning with -Wmissing-prototypes */ -void foo(void); - -void foo(void) -{ - OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax); - OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx); - OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx); - OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx); - OFFSET(IA32_SIGCONTEXT_si, sigcontext, si); - OFFSET(IA32_SIGCONTEXT_di, sigcontext, di); - OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp); - OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp); - OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip); - BLANK(); - - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); - OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); - OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); - OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math); - OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); - OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); - OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); - BLANK(); - - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - OFFSET(TI_cpu, thread_info, cpu); - BLANK(); - - OFFSET(PT_EBX, pt_regs, bx); - OFFSET(PT_ECX, pt_regs, cx); - OFFSET(PT_EDX, pt_regs, dx); - OFFSET(PT_ESI, pt_regs, si); - OFFSET(PT_EDI, pt_regs, di); - OFFSET(PT_EBP, pt_regs, bp); - OFFSET(PT_EAX, pt_regs, ax); - OFFSET(PT_DS, pt_regs, ds); - OFFSET(PT_ES, pt_regs, es); - OFFSET(PT_FS, pt_regs, fs); - OFFSET(PT_GS, pt_regs, gs); - OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); - OFFSET(PT_EIP, pt_regs, ip); - OFFSET(PT_CS, pt_regs, cs); - OFFSET(PT_EFLAGS, pt_regs, flags); - OFFSET(PT_OLDESP, pt_regs, sp); - OFFSET(PT_OLDSS, pt_regs, ss); - BLANK(); - - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); - BLANK(); - - /* Offset from the sysenter stack to tss.sp0 */ - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - - sizeof(struct tss_struct)); - -#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) - BLANK(); - OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); - OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); - - BLANK(); - OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); - OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); - OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); - OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp); - OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc); - OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc); - OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt); - OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum); - OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); - OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); -#endif - BLANK(); - DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); - DEFINE(NR_syscalls, sizeof(syscalls)); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/asm-offsets_64.c b/ANDROID_3.4.5/arch/x86/kernel/asm-offsets_64.c deleted file mode 100644 index 1b4754f8..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/asm-offsets_64.c +++ /dev/null @@ -1,89 +0,0 @@ -#include <asm/ia32.h> - -#define __SYSCALL_64(nr, sym, compat) [nr] = 1, -#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, -#ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_X32(nr, sym, compat) [nr] = 1, -#else -# define __SYSCALL_X32(nr, sym, compat) /* nothing */ -#endif -static char syscalls_64[] = { -#include <asm/syscalls_64.h> -}; -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, -static char syscalls_ia32[] = { -#include <asm/syscalls_32.h> -}; - -int main(void) -{ -#ifdef CONFIG_PARAVIRT - OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); - OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); - OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); - OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); - BLANK(); -#endif - -#ifdef CONFIG_IA32_EMULATION - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - BLANK(); - -#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry) - ENTRY(ax); - ENTRY(bx); - ENTRY(cx); - ENTRY(dx); - ENTRY(si); - ENTRY(di); - ENTRY(bp); - ENTRY(sp); - ENTRY(ip); - BLANK(); -#undef ENTRY - - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); - BLANK(); -#endif - -#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) - ENTRY(bx); - ENTRY(bx); - ENTRY(cx); - ENTRY(dx); - ENTRY(sp); - ENTRY(bp); - ENTRY(si); - ENTRY(di); - ENTRY(r8); - ENTRY(r9); - ENTRY(r10); - ENTRY(r11); - ENTRY(r12); - ENTRY(r13); - ENTRY(r14); - ENTRY(r15); - ENTRY(flags); - BLANK(); -#undef ENTRY - -#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry) - ENTRY(cr0); - ENTRY(cr2); - ENTRY(cr3); - ENTRY(cr4); - ENTRY(cr8); - BLANK(); -#undef ENTRY - - OFFSET(TSS_ist, tss_struct, x86_tss.ist); - BLANK(); - - DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); - DEFINE(NR_syscalls, sizeof(syscalls_64)); - - DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); - DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/audit_64.c b/ANDROID_3.4.5/arch/x86/kernel/audit_64.c deleted file mode 100644 index 06d3e5a1..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/audit_64.c +++ /dev/null @@ -1,81 +0,0 @@ -#include <linux/init.h> -#include <linux/types.h> -#include <linux/audit.h> -#include <asm/unistd.h> - -static unsigned dir_class[] = { -#include <asm-generic/audit_dir_write.h> -~0U -}; - -static unsigned read_class[] = { -#include <asm-generic/audit_read.h> -~0U -}; - -static unsigned write_class[] = { -#include <asm-generic/audit_write.h> -~0U -}; - -static unsigned chattr_class[] = { -#include <asm-generic/audit_change_attr.h> -~0U -}; - -static unsigned signal_class[] = { -#include <asm-generic/audit_signal.h> -~0U -}; - -int audit_classify_arch(int arch) -{ -#ifdef CONFIG_IA32_EMULATION - if (arch == AUDIT_ARCH_I386) - return 1; -#endif - return 0; -} - -int audit_classify_syscall(int abi, unsigned syscall) -{ -#ifdef CONFIG_IA32_EMULATION - extern int ia32_classify_syscall(unsigned); - if (abi == AUDIT_ARCH_I386) - return ia32_classify_syscall(syscall); -#endif - switch(syscall) { - case __NR_open: - return 2; - case __NR_openat: - return 3; - case __NR_execve: - return 5; - default: - return 0; - } -} - -static int __init audit_classes_init(void) -{ -#ifdef CONFIG_IA32_EMULATION - extern __u32 ia32_dir_class[]; - extern __u32 ia32_write_class[]; - extern __u32 ia32_read_class[]; - extern __u32 ia32_chattr_class[]; - extern __u32 ia32_signal_class[]; - audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class); - audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class); - audit_register_class(AUDIT_CLASS_CHATTR_32, ia32_chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL_32, ia32_signal_class); -#endif - audit_register_class(AUDIT_CLASS_WRITE, write_class); - audit_register_class(AUDIT_CLASS_READ, read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); - audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); - return 0; -} - -__initcall(audit_classes_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/bootflag.c b/ANDROID_3.4.5/arch/x86/kernel/bootflag.c deleted file mode 100644 index 5de7f4c5..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/bootflag.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Implement 'Simple Boot Flag Specification 2.0' - */ -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/spinlock.h> -#include <linux/acpi.h> -#include <asm/io.h> - -#include <linux/mc146818rtc.h> - -#define SBF_RESERVED (0x78) -#define SBF_PNPOS (1<<0) -#define SBF_BOOTING (1<<1) -#define SBF_DIAG (1<<2) -#define SBF_PARITY (1<<7) - -int sbf_port __initdata = -1; /* set via acpi_boot_init() */ - -static int __init parity(u8 v) -{ - int x = 0; - int i; - - for (i = 0; i < 8; i++) { - x ^= (v & 1); - v >>= 1; - } - - return x; -} - -static void __init sbf_write(u8 v) -{ - unsigned long flags; - - if (sbf_port != -1) { - v &= ~SBF_PARITY; - if (!parity(v)) - v |= SBF_PARITY; - - printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", - sbf_port, v); - - spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(v, sbf_port); - spin_unlock_irqrestore(&rtc_lock, flags); - } -} - -static u8 __init sbf_read(void) -{ - unsigned long flags; - u8 v; - - if (sbf_port == -1) - return 0; - - spin_lock_irqsave(&rtc_lock, flags); - v = CMOS_READ(sbf_port); - spin_unlock_irqrestore(&rtc_lock, flags); - - return v; -} - -static int __init sbf_value_valid(u8 v) -{ - if (v & SBF_RESERVED) /* Reserved bits */ - return 0; - if (!parity(v)) - return 0; - - return 1; -} - -static int __init sbf_init(void) -{ - u8 v; - - if (sbf_port == -1) - return 0; - - v = sbf_read(); - if (!sbf_value_valid(v)) { - printk(KERN_WARNING "Simple Boot Flag value 0x%x read from " - "CMOS RAM was invalid\n", v); - } - - v &= ~SBF_RESERVED; - v &= ~SBF_BOOTING; - v &= ~SBF_DIAG; -#if defined(CONFIG_ISAPNP) - v |= SBF_PNPOS; -#endif - sbf_write(v); - - return 0; -} -module_init(sbf_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/check.c b/ANDROID_3.4.5/arch/x86/kernel/check.c deleted file mode 100644 index 5da1269e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/check.c +++ /dev/null @@ -1,159 +0,0 @@ -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/kthread.h> -#include <linux/workqueue.h> -#include <linux/memblock.h> - -#include <asm/proto.h> - -/* - * Some BIOSes seem to corrupt the low 64k of memory during events - * like suspend/resume and unplugging an HDMI cable. Reserve all - * remaining free memory in that area and fill it with a distinct - * pattern. - */ -#define MAX_SCAN_AREAS 8 - -static int __read_mostly memory_corruption_check = -1; - -static unsigned __read_mostly corruption_check_size = 64*1024; -static unsigned __read_mostly corruption_check_period = 60; /* seconds */ - -static struct scan_area { - u64 addr; - u64 size; -} scan_areas[MAX_SCAN_AREAS]; -static int num_scan_areas; - -static __init int set_corruption_check(char *arg) -{ - char *end; - - memory_corruption_check = simple_strtol(arg, &end, 10); - - return (*end == 0) ? 0 : -EINVAL; -} -early_param("memory_corruption_check", set_corruption_check); - -static __init int set_corruption_check_period(char *arg) -{ - char *end; - - corruption_check_period = simple_strtoul(arg, &end, 10); - - return (*end == 0) ? 0 : -EINVAL; -} -early_param("memory_corruption_check_period", set_corruption_check_period); - -static __init int set_corruption_check_size(char *arg) -{ - char *end; - unsigned size; - - size = memparse(arg, &end); - - if (*end == '\0') - corruption_check_size = size; - - return (size == corruption_check_size) ? 0 : -EINVAL; -} -early_param("memory_corruption_check_size", set_corruption_check_size); - - -void __init setup_bios_corruption_check(void) -{ - phys_addr_t start, end; - u64 i; - - if (memory_corruption_check == -1) { - memory_corruption_check = -#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK - 1 -#else - 0 -#endif - ; - } - - if (corruption_check_size == 0) - memory_corruption_check = 0; - - if (!memory_corruption_check) - return; - - corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); - - for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { - start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), - PAGE_SIZE, corruption_check_size); - end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), - PAGE_SIZE, corruption_check_size); - if (start >= end) - continue; - - memblock_reserve(start, end - start); - scan_areas[num_scan_areas].addr = start; - scan_areas[num_scan_areas].size = end - start; - - /* Assume we've already mapped this early memory */ - memset(__va(start), 0, end - start); - - if (++num_scan_areas >= MAX_SCAN_AREAS) - break; - } - - if (num_scan_areas) - printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas); -} - - -void check_for_bios_corruption(void) -{ - int i; - int corruption = 0; - - if (!memory_corruption_check) - return; - - for (i = 0; i < num_scan_areas; i++) { - unsigned long *addr = __va(scan_areas[i].addr); - unsigned long size = scan_areas[i].size; - - for (; size; addr++, size -= sizeof(unsigned long)) { - if (!*addr) - continue; - printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", - addr, __pa(addr), *addr); - corruption = 1; - *addr = 0; - } - } - - WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n"); -} - -static void check_corruption(struct work_struct *dummy); -static DECLARE_DELAYED_WORK(bios_check_work, check_corruption); - -static void check_corruption(struct work_struct *dummy) -{ - check_for_bios_corruption(); - schedule_delayed_work(&bios_check_work, - round_jiffies_relative(corruption_check_period*HZ)); -} - -static int start_periodic_check_for_corruption(void) -{ - if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0) - return 0; - - printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", - corruption_check_period); - - /* First time we run the checks right away */ - schedule_delayed_work(&bios_check_work, 0); - return 0; -} - -module_init(start_periodic_check_for_corruption); - diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/Makefile b/ANDROID_3.4.5/arch/x86/kernel/cpu/Makefile deleted file mode 100644 index 6ab6aa2f..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/Makefile +++ /dev/null @@ -1,50 +0,0 @@ -# -# Makefile for x86-compatible CPU details, features and quirks -# - -# Don't trace early stages of a secondary CPU boot -ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_common.o = -pg -CFLAGS_REMOVE_perf_event.o = -pg -endif - -# Make sure load_percpu_segment has no stackprotector -nostackp := $(call cc-option, -fno-stack-protector) -CFLAGS_common.o := $(nostackp) - -obj-y := intel_cacheinfo.o scattered.o topology.o -obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o sched.o mshyperv.o -obj-y += rdrand.o -obj-y += match.o - -obj-$(CONFIG_X86_32) += bugs.o -obj-$(CONFIG_X86_64) += bugs_64.o - -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o -obj-$(CONFIG_CPU_SUP_AMD) += amd.o -obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o -obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o -obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o -obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o - -obj-$(CONFIG_PERF_EVENTS) += perf_event.o - -ifdef CONFIG_PERF_EVENTS -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -endif - -obj-$(CONFIG_X86_MCE) += mcheck/ -obj-$(CONFIG_MTRR) += mtrr/ - -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o - -quiet_cmd_mkcapflags = MKCAP $@ - cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ - -cpufeature = $(src)/../../include/asm/cpufeature.h - -targets += capflags.c -$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE - $(call if_changed,mkcapflags) diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/amd.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/amd.c deleted file mode 100644 index 146bb621..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/amd.c +++ /dev/null @@ -1,802 +0,0 @@ -#include <linux/export.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <linux/elf.h> -#include <linux/mm.h> - -#include <linux/io.h> -#include <linux/sched.h> -#include <asm/processor.h> -#include <asm/apic.h> -#include <asm/cpu.h> -#include <asm/pci-direct.h> - -#ifdef CONFIG_X86_64 -# include <asm/numa_64.h> -# include <asm/mmconfig.h> -# include <asm/cacheflush.h> -#endif - -#include "cpu.h" - -#ifdef CONFIG_X86_32 -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" - * (Publication # 21266 Issue Date: August 1998) - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) -{ -/* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl(CBAR) & CBAR_ENB) - outl(0 | CBAR_KEY, CBAR); - } -} - - -static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - return; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk(KERN_CONT - "system stability may be impaired when more than 32 MB are used.\n"); - else - printk(KERN_CONT "probably OK (after B9730xxxx).\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - return; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - return; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - return; - } -} - -static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) -{ - /* calling is from identify_secondary_cpu() ? */ - if (!c->cpu_index) - return; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability - * bit. It's worth noting that the A5 stepping (662) of some - * Athlon XP's have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for - * more. - */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || - (c->x86_model > 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, not a certified SMP capable AMD system. */ - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - WARN_ONCE(1, "WARNING: This combination of AMD" - " processors is not suitable for SMP.\n"); - if (!test_taint(TAINT_UNSAFE_SMP)) - add_taint(TAINT_UNSAFE_SMP); - -valid_k7: - ; -} - -static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk(KERN_INFO - "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", - l, ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); - - amd_k7_smp_check(c); -} -#endif - -#ifdef CONFIG_NUMA -/* - * To workaround broken NUMA config. Read the comment in - * srat_detect_node(). - */ -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = __apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = __apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * Fixup core topology information for - * (1) AMD multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) AMD processors supporting compute units - */ -#ifdef CONFIG_X86_HT -static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) -{ - u32 nodes, cores_per_cu = 1; - u8 node_id; - int cpu = smp_processor_id(); - - /* get information required for multi-node processors */ - if (cpu_has(c, X86_FEATURE_TOPOEXT)) { - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - nodes = ((ecx >> 8) & 7) + 1; - node_id = ecx & 7; - - /* get compute unit information */ - smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->compute_unit_id = ebx & 0xff; - cores_per_cu += ((ebx >> 8) & 3); - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes = ((value >> 3) & 7) + 1; - node_id = value & 7; - } else - return; - - /* fixup multi-node processor information */ - if (nodes > 1) { - u32 cores_per_node; - u32 cus_per_node; - - set_cpu_cap(c, X86_FEATURE_AMD_DCM); - cores_per_node = c->x86_max_cores / nodes; - cus_per_node = cores_per_node / cores_per_cu; - - /* store NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = node_id; - - /* core id has to be in the [0 .. cores_per_node - 1] range */ - c->cpu_core_id %= cores_per_node; - c->compute_unit_id %= cus_per_node; - } -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits; - int cpu = smp_processor_id(); - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; - /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; - amd_get_topology(c); -#endif -} - -int amd_get_nb_id(int cpu) -{ - int id = 0; -#ifdef CONFIG_SMP - id = per_cpu(cpu_llc_id, cpu); -#endif - return id; -} -EXPORT_SYMBOL_GPL(amd_get_nb_id); - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node; - unsigned apicid = c->apicid; - - node = numa_cpu_node(cpu); - if (node == NUMA_NO_NODE) - node = per_cpu(cpu_llc_id, cpu); - - /* - * On multi-fabric platform (e.g. Numascale NumaChip) a - * platform-specific handler needs to be called to fixup some - * IDs of the CPU. - */ - if (x86_cpuinit.fixup_cpu_id) - x86_cpuinit.fixup_cpu_id(c, node); - - if (!node_online(node)) { - /* - * Two possibilities here: - * - * - The CPU is missing memory and no node was created. In - * that case try picking one from a nearby CPU. - * - * - The APIC IDs differ from the HyperTransport node IDs - * which the K8 northbridge parsing fills in. Assume - * they are all increased by a constant offset, but in - * the same order as the HT nodeids. If that doesn't - * result in a usable node fall back to the path for the - * previous case. - * - * This workaround operates directly on the mapping between - * APIC ID and NUMA node, assuming certain relationship - * between APIC ID, HT node ID and NUMA topology. As going - * through CPU mapping may alter the outcome, directly - * access __apicid_to_node[]. - */ - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = __apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - -static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { - - if (c->x86 > 0x10 || - (c->x86 == 0x10 && c->x86_model >= 0x2)) { - u64 val; - - rdmsrl(MSR_K7_HWCR, val); - if (!(val & BIT(24))) - printk(KERN_WARNING FW_BUG "TSC doesn't count " - "with P0 frequency!\n"); - } - } - - if (c->x86 == 0x15) { - unsigned long upperbit; - u32 cpuid, assoc; - - cpuid = cpuid_edx(0x80000005); - assoc = cpuid >> 16 & 0xff; - upperbit = ((cpuid >> 24) << 10) / assoc; - - va_align.mask = (upperbit - 1) & PAGE_MASK; - va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; - } -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* - * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate - * with P/T states and does not stop in deep C-states - */ - if (c->x86_power & (1 << 8)) { - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - sched_clock_stable = 1; - } - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSCALL32); -#else - /* Set MTRR capability flag if appropriate */ - if (c->x86 == 5) - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); -#endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) - /* check CPU config space for extended APIC ID */ - if (cpu_has_apic && c->x86 >= 0xf) { - unsigned int val; - val = read_pci_config(0, 24, 0, 0x68); - if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) - set_cpu_cap(c, X86_FEATURE_EXTD_APICID); - } -#endif -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ - u32 dummy; - -#ifdef CONFIG_SMP - unsigned long long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } -#endif - - early_init_amd(c); - - /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway - */ - clear_cpu_cap(c, 0*32+31); - -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* - * Some BIOSes incorrectly force this feature, but only K8 - * revision D (model = 0x14) and later actually support it. - * (AMD Erratum #110, docId: 25759). - */ - if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { - u64 val; - - clear_cpu_cap(c, X86_FEATURE_LAHF_LM); - if (!rdmsrl_amd_safe(0xc001100d, &val)) { - val &= ~(1ULL << 32); - wrmsrl_amd_safe(0xc001100d, val); - } - } - - } - if (c->x86 >= 0x10) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* get apicid instead of initial apic id from cpuid */ - c->apicid = hard_smp_processor_id(); -#else - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } - - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - - /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - u64 val; - - if (!rdmsrl_amd_safe(0xc0011005, &val)) { - val |= 1ULL << 54; - wrmsrl_amd_safe(0xc0011005, val); - rdmsrl(0xc0011005, val); - if (val & (1ULL << 54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - printk(KERN_INFO FW_INFO "CPU: Re-enabling " - "disabled Topology Extensions Support\n"); - } - } - } - - cpu_detect_cache_sizes(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); - } - -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - - if (c->extended_cpuid_level >= 0x80000006) { - if (cpuid_edx(0x80000006) & 0xf000) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } - - if (c->x86 >= 0xf) - set_cpu_cap(c, X86_FEATURE_K8); - - if (cpu_has_xmm2) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif - - /* - * Family 0x12 and above processors have APIC timer - * running in deep C states. - */ - if (c->x86 > 0x11) - set_cpu_cap(c, X86_FEATURE_ARAT); - - /* - * Disable GART TLB Walk Errors on Fam10h. We do this here - * because this is always needed when GART is enabled, even in a - * kernel which has no MCE support built in. - */ - if (c->x86 == 0x10) { - /* - * BIOS should disable GartTlbWlk Errors themself. If - * it doesn't do it here as suggested by the BKDG. - * - * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 - */ - u64 mask; - int err; - - err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); - if (err == 0) { - mask |= (1 << 10); - checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); - } - } - - rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, - unsigned int size) -{ - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - /* Duron Rev A0 */ - if (c->x86_model == 3 && c->x86_mask == 0) - size = 64; - /* Tbird rev A1/A2 */ - if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) - size = 256; - } - return size; -} -#endif - -static const struct cpu_dev __cpuinitconst amd_cpu_dev = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = - { - [3] = "486 DX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", - [14] = "Am5x86-WT", - [15] = "Am5x86-WB" - } - }, - }, - .c_size_cache = amd_size_cache, -#endif - .c_early_init = early_init_amd, - .c_bsp_init = bsp_init_amd, - .c_init = init_amd, - .c_x86_vendor = X86_VENDOR_AMD, -}; - -cpu_dev_register(amd_cpu_dev); - -/* - * AMD errata checking - * - * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or - * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that - * have an OSVW id assigned, which it takes as first argument. Both take a - * variable number of family-specific model-stepping ranges created by - * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const - * int[] in arch/x86/include/asm/processor.h. - * - * Example: - * - * const int amd_erratum_319[] = - * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), - * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), - * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); - */ - -const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), - AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); -EXPORT_SYMBOL_GPL(amd_erratum_400); - -const int amd_erratum_383[] = - AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); -EXPORT_SYMBOL_GPL(amd_erratum_383); - -bool cpu_has_amd_erratum(const int *erratum) -{ - struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info); - int osvw_id = *erratum++; - u32 range; - u32 ms; - - /* - * If called early enough that current_cpu_data hasn't been initialized - * yet, fall back to boot_cpu_data. - */ - if (cpu->x86 == 0) - cpu = &boot_cpu_data; - - if (cpu->x86_vendor != X86_VENDOR_AMD) - return false; - - if (osvw_id >= 0 && osvw_id < 65536 && - cpu_has(cpu, X86_FEATURE_OSVW)) { - u64 osvw_len; - - rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); - if (osvw_id < osvw_len) { - u64 osvw_bits; - - rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), - osvw_bits); - return osvw_bits & (1ULL << (osvw_id & 0x3f)); - } - } - - /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_mask; - while ((range = *erratum++)) - if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && - (ms >= AMD_MODEL_RANGE_START(range)) && - (ms <= AMD_MODEL_RANGE_END(range))) - return true; - - return false; -} - -EXPORT_SYMBOL_GPL(cpu_has_amd_erratum); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/bugs.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/bugs.c deleted file mode 100644 index 46674fbb..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/bugs.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * <rreilova@ececs.uc.edu> - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - */ -#include <linux/init.h> -#include <linux/utsname.h> -#include <asm/bugs.h> -#include <asm/processor.h> -#include <asm/processor-flags.h> -#include <asm/i387.h> -#include <asm/msr.h> -#include <asm/paravirt.h> -#include <asm/alternative.h> - -static int __init no_halt(char *s) -{ - WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n"); - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - -static int __init no_387(char *s) -{ - boot_cpu_data.hard_math = 0; - write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0()); - return 1; -} - -__setup("no387", no_387); - -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - s32 fdiv_bug; - - if (!boot_cpu_data.hard_math) { -#ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); - for (;;) ; -#endif - return; - } - - kernel_fpu_begin(); - - /* - * trap_init() enabled FXSR and company _before_ testing for FP - * problems here. - * - * Test for the divl bug.. - */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&fdiv_bug) - : "m" (*&x), "m" (*&y)); - - kernel_fpu_end(); - - boot_cpu_data.fdiv_bug = fdiv_bug; - if (boot_cpu_data.fdiv_bug) - printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); -} - -static void __init check_hlt(void) -{ - if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) - return; - - printk(KERN_INFO "Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - printk(KERN_CONT "OK.\n"); -} - -/* - * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. - */ - -static void __init check_popad(void) -{ -#ifndef CONFIG_X86_POPAD_OK - int res, inp = (int) &res; - - printk(KERN_INFO "Checking for popad bug... "); - __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=&a" (res) - : "d" (inp) - : "ecx", "edi"); - /* - * If this fails, it means that any user program may lock the - * CPU hard. Too bad. - */ - if (res != 12345678) - printk(KERN_CONT "Buggy.\n"); - else - printk(KERN_CONT "OK.\n"); -#endif -} - -/* - * Check whether we are able to run this kernel safely on SMP. - * - * - In order to run on a i386, we need to be compiled for i386 - * (for due to lack of "invlpg" and working WP on a i386) - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - */ - -static void __init check_config(void) -{ -/* - * We'd better not be a i386 if we're configured to use some - * i486+ only features! (WP works in supervisor mode and the - * new "invlpg" and "bswap" instructions) - */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \ - defined(CONFIG_X86_BSWAP) - if (boot_cpu_data.x86 == 3) - panic("Kernel requires i486+ for 'invlpg' and other features"); -#endif -} - - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#ifndef CONFIG_SMP - printk(KERN_INFO "CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_config(); - check_fpu(); - check_hlt(); - check_popad(); - init_utsname()->machine[1] = - '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/bugs_64.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/bugs_64.c deleted file mode 100644 index 04f0fe5a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/bugs_64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * Copyright (C) 2000 SuSE - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <asm/alternative.h> -#include <asm/bugs.h> -#include <asm/processor.h> -#include <asm/mtrr.h> -#include <asm/cacheflush.h> - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#if !defined(CONFIG_SMP) - printk(KERN_INFO "CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/centaur.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/centaur.c deleted file mode 100644 index 159103c0..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/centaur.c +++ /dev/null @@ -1,500 +0,0 @@ -#include <linux/bitops.h> -#include <linux/kernel.h> -#include <linux/init.h> - -#include <asm/processor.h> -#include <asm/e820.h> -#include <asm/mtrr.h> -#include <asm/msr.h> - -#include "cpu.h" - -#ifdef CONFIG_X86_OOSTORE - -static u32 __cpuinit power2(u32 x) -{ - u32 s = 1; - - while (s <= x) - s <<= 1; - - return s >>= 1; -} - - -/* - * Set up an actual MCR - */ -static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) -{ - u32 lo, hi; - - hi = base & ~0xFFF; - lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ - lo &= ~0xFFF; /* Remove the ctrl value bits */ - lo |= key; /* Attribute we wish to set */ - wrmsr(reg+MSR_IDT_MCR0, lo, hi); - mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ -} - -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ -static u32 __cpuinit ramtop(void) -{ - u32 clip = 0xFFFFFFFFUL; - u32 top = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophe already - */ - if (e820.map[i].type == E820_RESERVED) { - if (e820.map[i].addr >= 0x100000UL && - e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* - * Everything below 'top' should be RAM except for the ISA hole. - * Because of the limited MCR's we want to map NV/ACPI into our - * MCR range for gunk in RAM - * - * Clip might cause us to MCR insufficient RAM but that is an - * acceptable failure mode and should only bite obscure boxes with - * a VESA hole at 15Mb - * - * The second case Clip sometimes kicks in is when the EBDA is marked - * as reserved. Again we fail safe with reasonable results - */ - if (top > clip) - top = clip; - - return top; -} - -/* - * Compute a set of MCR's to give maximum coverage - */ -static int __cpuinit centaur_mcr_compute(int nr, int key) -{ - u32 mem = ramtop(); - u32 root = power2(mem); - u32 base = root; - u32 top = root; - u32 floor = 0; - int ct = 0; - - while (ct < nr) { - u32 fspace = 0; - u32 high; - u32 low; - - /* - * Find the largest block we will fill going upwards - */ - high = power2(mem-top); - - /* - * Find the largest block we will fill going downwards - */ - low = base/2; - - /* - * Don't fill below 1Mb going downwards as there - * is an ISA hole in the way. - */ - if (base <= 1024*1024) - low = 0; - - /* - * See how much space we could cover by filling below - * the ISA hole - */ - - if (floor == 0) - fspace = 512*1024; - else if (floor == 512*1024) - fspace = 128*1024; - - /* And forget ROM space */ - - /* - * Now install the largest coverage we get - */ - if (fspace > high && fspace > low) { - centaur_mcr_insert(ct, floor, fspace, key); - floor += fspace; - } else if (high > low) { - centaur_mcr_insert(ct, top, high, key); - top += high; - } else if (low > 0) { - base -= low; - centaur_mcr_insert(ct, base, low, key); - } else - break; - ct++; - } - /* - * We loaded ct values. We now need to set the mask. The caller - * must do this bit. - */ - return ct; -} - -static void __cpuinit centaur_create_optimal_mcr(void) -{ - int used; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining and weak write ordered. - * - * To experiment with: Linux never uses stack operations for - * mmio spaces so we could globally enable stack operation wc - * - * Load the registers with type 31 - full write combining, all - * writes weakly ordered. - */ - used = centaur_mcr_compute(6, 31); - - /* - * Wipe unused MCRs - */ - for (i = used; i < 8; i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -static void __cpuinit winchip2_create_optimal_mcr(void) -{ - u32 lo, hi; - int used; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining, weak store ordered. - * - * Load the registers with type 25 - * 8 - weak write ordering - * 16 - weak read ordering - * 1 - write combining - */ - used = centaur_mcr_compute(6, 25); - - /* - * Mark the registers we are using. - */ - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - for (i = 0; i < used; i++) - lo |= 1<<(9+i); - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - - /* - * Wipe unused MCRs - */ - - for (i = used; i < 8; i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -/* - * Handle the MCR key on the Winchip 2. - */ -static void __cpuinit winchip2_unprotect_mcr(void) -{ - u32 lo, hi; - u32 key; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo &= ~0x1C0; /* blank bits 8-6 */ - key = (lo>>17) & 7; - lo |= key<<6; /* replace with unlock key */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -static void __cpuinit winchip2_protect_mcr(void) -{ - u32 lo, hi; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo &= ~0x1C0; /* blank bits 8-6 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} -#endif /* CONFIG_X86_OOSTORE */ - -#define ACE_PRESENT (1 << 6) -#define ACE_ENABLED (1 << 7) -#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ - -#define RNG_PRESENT (1 << 2) -#define RNG_ENABLED (1 << 3) -#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ - -static void __cpuinit init_c3(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - - /* Test for Centaur Extended Feature Flags presence */ - if (cpuid_eax(0xC0000000) >= 0xC0000001) { - u32 tmp = cpuid_edx(0xC0000001); - - /* enable ACE unit, if present and disabled */ - if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { - rdmsr(MSR_VIA_FCR, lo, hi); - lo |= ACE_FCR; /* enable ACE unit */ - wrmsr(MSR_VIA_FCR, lo, hi); - printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); - } - - /* enable RNG unit, if present and disabled */ - if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { - rdmsr(MSR_VIA_RNG, lo, hi); - lo |= RNG_ENABLE; /* enable RNG unit */ - wrmsr(MSR_VIA_RNG, lo, hi); - printk(KERN_INFO "CPU: Enabled h/w RNG\n"); - } - - /* store Centaur Extended Feature Flags as - * word 5 of the CPU capability bit array - */ - c->x86_capability[5] = cpuid_edx(0xC0000001); - } -#ifdef CONFIG_X86_32 - /* Cyrix III family needs CX8 & PGE explicitly enabled. */ - if (c->x86_model >= 6 && c->x86_model <= 13) { - rdmsr(MSR_VIA_FCR, lo, hi); - lo |= (1<<1 | 1<<7); - wrmsr(MSR_VIA_FCR, lo, hi); - set_cpu_cap(c, X86_FEATURE_CX8); - } - - /* Before Nehemiah, the C3's had 3dNOW! */ - if (c->x86_model >= 6 && c->x86_model < 9) - set_cpu_cap(c, X86_FEATURE_3DNOW); -#endif - if (c->x86 == 0x6 && c->x86_model >= 0xf) { - c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - - cpu_detect_cache_sizes(c); -} - -enum { - ECX8 = 1<<1, - EIERRINT = 1<<2, - DPM = 1<<3, - DMCE = 1<<4, - DSTPCLK = 1<<5, - ELINEAR = 1<<6, - DSMC = 1<<7, - DTLOCK = 1<<8, - EDCTLB = 1<<8, - EMMX = 1<<9, - DPDC = 1<<11, - EBRPRED = 1<<12, - DIC = 1<<13, - DDC = 1<<14, - DNA = 1<<15, - ERETSTK = 1<<16, - E2MMX = 1<<19, - EAMD3D = 1<<20, -}; - -static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) -{ - switch (c->x86) { -#ifdef CONFIG_X86_32 - case 5: - /* Emulate MTRRs using Centaur's MCR. */ - set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); - break; -#endif - case 6: - if (c->x86_model >= 0xf) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - break; - } -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#endif -} - -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_32 - char *name; - u32 fcr_set = 0; - u32 fcr_clr = 0; - u32 lo, hi, newlo; - u32 aa, bb, cc, dd; - - /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway - */ - clear_cpu_cap(c, 0*32+31); -#endif - early_init_centaur(c); - switch (c->x86) { -#ifdef CONFIG_X86_32 - case 5: - switch (c->x86_model) { - case 4: - name = "C6"; - fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; - fcr_clr = DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); - clear_cpu_cap(c, X86_FEATURE_TSC); -#ifdef CONFIG_X86_OOSTORE - centaur_create_optimal_mcr(); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - * - * The C6 original lacks weak read order - * - * Note 0x120 is write only on Winchip 1 - */ - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); -#endif - break; - case 8: - switch (c->x86_mask) { - default: - name = "2"; - break; - case 7 ... 9: - name = "2A"; - break; - case 10 ... 15: - name = "2B"; - break; - } - fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| - E2MMX|EAMD3D; - fcr_clr = DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - case 9: - name = "3"; - fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| - E2MMX|EAMD3D; - fcr_clr = DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - default: - name = "??"; - } - - rdmsr(MSR_IDT_FCR1, lo, hi); - newlo = (lo|fcr_set) & (~fcr_clr); - - if (newlo != lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", - lo, newlo); - wrmsr(MSR_IDT_FCR1, newlo, hi); - } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); - } - /* Emulate MTRRs using Centaur's MCR. */ - set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); - /* Report CX8 */ - set_cpu_cap(c, X86_FEATURE_CX8); - /* Set 3DNow! on Winchip 2 and above. */ - if (c->x86_model >= 8) - set_cpu_cap(c, X86_FEATURE_3DNOW); - /* See if we can find out some more. */ - if (cpuid_eax(0x80000000) >= 0x80000005) { - /* Yes, we can. */ - cpuid(0x80000005, &aa, &bb, &cc, &dd); - /* Add L1 data and code cache sizes. */ - c->x86_cache_size = (cc>>24)+(dd>>24); - } - sprintf(c->x86_model_id, "WinChip %s", name); - break; -#endif - case 6: - init_c3(c); - break; - } -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -#endif -} - -static unsigned int __cpuinit -centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ -#ifdef CONFIG_X86_32 - /* VIA C3 CPUs (670-68F) need further shifting. */ - if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) - size >>= 8; - - /* - * There's also an erratum in Nehemiah stepping 1, which - * returns '65KB' instead of '64KB' - * - Note, it seems this may only be in engineering samples. - */ - if ((c->x86 == 6) && (c->x86_model == 9) && - (c->x86_mask == 1) && (size == 65)) - size -= 1; -#endif - return size; -} - -static const struct cpu_dev __cpuinitconst centaur_cpu_dev = { - .c_vendor = "Centaur", - .c_ident = { "CentaurHauls" }, - .c_early_init = early_init_centaur, - .c_init = init_centaur, - .c_size_cache = centaur_size_cache, - .c_x86_vendor = X86_VENDOR_CENTAUR, -}; - -cpu_dev_register(centaur_cpu_dev); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/common.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/common.c deleted file mode 100644 index cf793021..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/common.c +++ /dev/null @@ -1,1318 +0,0 @@ -#include <linux/bootmem.h> -#include <linux/linkage.h> -#include <linux/bitops.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/percpu.h> -#include <linux/string.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/kgdb.h> -#include <linux/smp.h> -#include <linux/io.h> - -#include <asm/stackprotector.h> -#include <asm/perf_event.h> -#include <asm/mmu_context.h> -#include <asm/archrandom.h> -#include <asm/hypervisor.h> -#include <asm/processor.h> -#include <asm/debugreg.h> -#include <asm/sections.h> -#include <linux/topology.h> -#include <linux/cpumask.h> -#include <asm/pgtable.h> -#include <linux/atomic.h> -#include <asm/proto.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/desc.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/mtrr.h> -#include <linux/numa.h> -#include <asm/asm.h> -#include <asm/cpu.h> -#include <asm/mce.h> -#include <asm/msr.h> -#include <asm/pat.h> - -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/uv/uv.h> -#endif - -#include "cpu.h" - -/* all of these masks are initialized in setup_cpu_local_masks() */ -cpumask_var_t cpu_initialized_mask; -cpumask_var_t cpu_callout_mask; -cpumask_var_t cpu_callin_mask; - -/* representing cpus for which sibling maps can be computed */ -cpumask_var_t cpu_sibling_setup_mask; - -/* correctly size the local cpu masks */ -void __init setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - cpu_detect_cache_sizes(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static const struct cpu_dev __cpuinitconst default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - -static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; - -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { -#ifdef CONFIG_X86_64 - /* - * We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - * - * TLS descriptors are currently at a different place compared to i386. - * Hopefully nobody expects them at a fixed place (Wine?) - */ - [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), -#else - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), - /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), - /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), - - [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - GDT_STACK_CANARY_INIT -#endif -} }; -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -static int __init x86_xsave_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - return 1; -} -__setup("noxsave", x86_xsave_setup); - -static int __init x86_xsaveopt_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - return 1; -} -__setup("noxsaveopt", x86_xsaveopt_setup); - -#ifdef CONFIG_X86_32 -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - /* - * Cyrix and IDT cpus allow disabling of CPUID - * so the code below may return different results - * when it is executed before and after enabling - * the CPUID. Add "volatile" to not allow gcc to - * optimize the subsequent calls to this function. - */ - asm volatile ("pushfl \n\t" - "pushfl \n\t" - "popl %0 \n\t" - "movl %0, %1 \n\t" - "xorl %2, %0 \n\t" - "pushl %0 \n\t" - "popfl \n\t" - "pushfl \n\t" - "popl %0 \n\t" - "popfl \n\t" - - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) - return; - - /* Disable processor serial number: */ - - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); -#else -static inline int flag_is_changeable_p(u32 flag) -{ - return 1; -} -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} -static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ -} -#endif - -static int disable_smep __cpuinitdata; -static __init int setup_disable_smep(char *arg) -{ - disable_smep = 1; - return 1; -} -__setup("nosmep", setup_disable_smep); - -static __cpuinit void setup_smep(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_SMEP)) { - if (unlikely(disable_smep)) { - setup_clear_cpu_cap(X86_FEATURE_SMEP); - clear_in_cr4(X86_CR4_SMEP); - } else - set_in_cr4(X86_CR4_SMEP); - } -} - -/* - * Some CPU features depend on higher CPUID levels, which may not always - * be available due to CPUID level capping or broken virtualization - * software. Add those features to this table to auto-disable them. - */ -struct cpuid_dependent_feature { - u32 feature; - u32 level; -}; - -static const struct cpuid_dependent_feature __cpuinitconst -cpuid_dependent_features[] = { - { X86_FEATURE_MWAIT, 0x00000005 }, - { X86_FEATURE_DCA, 0x00000009 }, - { X86_FEATURE_XSAVE, 0x0000000d }, - { 0, 0 } -}; - -static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) -{ - const struct cpuid_dependent_feature *df; - - for (df = cpuid_dependent_features; df->feature; df++) { - - if (!cpu_has(c, df->feature)) - continue; - /* - * Note: cpuid_level is set to -1 if unavailable, but - * extended_extended_level is set to 0 if unavailable - * and the legitimate extended levels are all negative - * when signed; hence the weird messing around with - * signs here... - */ - if (!((s32)df->level < 0 ? - (u32)df->level > (u32)c->extended_cpuid_level : - (s32)df->level > (s32)c->cpuid_level)) - continue; - - clear_cpu_cap(c, df->feature); - if (!warn) - continue; - - printk(KERN_WARNING - "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", - x86_cap_flags[df->feature], df->level); - } -} - -/* - * Naming convention should be: <Name> [(<Codename>)] - * This table only is used unless init_<vendor>() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this - * isn't used - */ - -/* Look up CPU names by table lookup. */ -static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) -{ - const struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - -__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; -__u32 cpu_caps_set[NCAPINTS] __cpuinitdata; - -void load_percpu_segment(int cpu) -{ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - loadsegment(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -#endif - load_stack_canary_segment(); -} - -/* - * Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. - */ -void switch_to_new_gdt(int cpu) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(cpu); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - /* Reload the per-cpu base */ - - load_percpu_segment(cpu); -} - -static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; - -static void __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (c->extended_cpuid_level < 0x80000004) - return; - - v = (unsigned int *)c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* - * Intel chips right-justify this string for some dumb reason; - * undo that brain damage: - */ - p = q = &c->x86_model_id[0]; - while (*p == ' ') - p++; - if (p != q) { - while (*p) - *q++ = *p++; - while (q <= &c->x86_model_id[48]) - *q++ = '\0'; /* Zero-pad the rest */ - } -} - -void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ebx, ecx, edx, l2size; - - n = c->extended_cpuid_level; - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - c->x86_cache_size = (ecx>>24) + (edx>>24); -#ifdef CONFIG_X86_64 - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; -#endif - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - l2size = ecx >> 16; - -#ifdef CONFIG_X86_64 - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); -#else - /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c, l2size); - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if (l2size == 0) - return; /* Again, no L2 cache is possible */ -#endif - - c->x86_cache_size = l2size; -} - -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - static bool printed; - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); - goto out; - } - - if (smp_num_siblings <= 1) - goto out; - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); - -out: - if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - printed = 1; - } -#endif -} - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - int i; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (!cpu_devs[i]) - break; - - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - - this_cpu = cpu_devs[i]; - c->x86_vendor = this_cpu->c_x86_vendor; - return; - } - } - - printk_once(KERN_ERR - "CPU: vendor_id '%s' unknown, using generic init.\n" \ - "CPU: Your system may be unstable.\n", v); - - c->x86_vendor = X86_VENDOR_UNKNOWN; - this_cpu = &default_cpu; -} - -void __cpuinit cpu_detect(struct cpuinfo_x86 *c) -{ - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - c->x86 = 4; - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 junk, tfms, cap0, misc; - - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; - - if (cap0 & (1<<19)) { - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_cache_alignment = c->x86_clflush_size; - } - } -} - -void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - u32 ebx; - - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } - - /* Additional Intel-defined flags: level 0x00000007 */ - if (c->cpuid_level >= 0x00000007) { - u32 eax, ebx, ecx, edx; - - cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - - c->x86_capability[9] = ebx; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - } - - if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } -#ifdef CONFIG_X86_32 - else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) - c->x86_phys_bits = 36; -#endif - - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - - init_scattered_cpuid_features(c); -} - -static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_32 - int i; - - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - - for (i = 0; i < X86_VENDOR_NUM; i++) - if (cpu_devs[i] && cpu_devs[i]->c_identify) { - c->x86_vendor_id[0] = 0; - cpu_devs[i]->c_identify(c); - if (c->x86_vendor_id[0]) { - get_cpu_vendor(c); - break; - } - } -#endif -} - -/* - * Do minimum CPU detection early. - * Fields really needed: vendor, cpuid_level, family, model, mask, - * cache alignment. - * The others are not touched to avoid unwanted side effects. - * - * WARNING: this function is only called on the BP. Don't add code here - * that is supposed to run on all CPUs. - */ -static void __init early_identify_cpu(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; - c->x86_phys_bits = 36; - c->x86_virt_bits = 48; -#else - c->x86_clflush_size = 32; - c->x86_phys_bits = 32; - c->x86_virt_bits = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->extended_cpuid_level = 0; - - if (!have_cpuid_p()) - identify_cpu_without_cpuid(c); - - /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) - return; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); - - c->cpu_index = 0; - filter_cpuid_features(c, false); - - setup_smep(c); - - if (this_cpu->c_bsp_init) - this_cpu->c_bsp_init(c); -} - -void __init early_cpu_init(void) -{ - const struct cpu_dev *const *cdev; - int count = 0; - -#ifdef CONFIG_PROCESSOR_SELECT - printk(KERN_INFO "KERNEL supported cpus:\n"); -#endif - - for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - const struct cpu_dev *cpudev = *cdev; - - if (count >= X86_VENDOR_NUM) - break; - cpu_devs[count] = cpudev; - count++; - -#ifdef CONFIG_PROCESSOR_SELECT - { - unsigned int j; - - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); - } - } -#endif - } - early_identify_cpu(&boot_cpu_data); -} - -/* - * The NOPL instruction is supposed to exist on all CPUs of family >= 6; - * unfortunately, that's not true in practice because of early VIA - * chips and (more importantly) broken virtualizers that are not easy - * to detect. In the latter case it doesn't even *fail* reliably, so - * probing for it doesn't even work. Disable it completely on 32-bit - * unless we can find a reliable way to detect all the broken cases. - * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_32 - clear_cpu_cap(c, X86_FEATURE_NOPL); -#else - set_cpu_cap(c, X86_FEATURE_NOPL); -#endif -} - -static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -{ - c->extended_cpuid_level = 0; - - if (!have_cpuid_p()) - identify_cpu_without_cpuid(c); - - /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) - return; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (c->cpuid_level >= 0x00000001) { - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT - c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); -# else - c->apicid = c->initial_apicid; -# endif -#endif - c->phys_proc_id = c->initial_apicid; - } - - setup_smep(c); - - get_model_name(c); /* Default name */ - - detect_nopl(c); -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; - c->x86_coreid_bits = 0; -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; - c->x86_phys_bits = 36; - c->x86_virt_bits = 48; -#else - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_clflush_size = 32; - c->x86_phys_bits = 32; - c->x86_virt_bits = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - generic_identify(c); - - if (this_cpu->c_identify) - this_cpu->c_identify(c); - - /* Clear/Set all flags overriden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } - -#ifdef CONFIG_X86_64 - c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); -#endif - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* - * The vendor-specific functions might have changed features. - * Now we do "generic changes." - */ - - /* Filter out anything that depends on CPUID levels we don't have */ - filter_cpuid_features(c, true); - - /* If the model name is still unset, do table lookup. */ - if (!c->x86_model_id[0]) { - const char *p; - p = table_lookup_model(c); - if (p) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86, c->x86_model); - } - -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - - init_hypervisor(c); - x86_init_rdrand(c); - - /* - * Clear/Set all flags overriden by options, need do it - * before following smp all cpus cap AND. - */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if (c != &boot_cpu_data) { - /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Init Machine Check Exception if available. */ - mcheck_cpu_init(c); - - select_idle_routine(c); - -#ifdef CONFIG_NUMA - numa_add_cpu(smp_processor_id()); -#endif -} - -#ifdef CONFIG_X86_64 -static void vgetcpu_set_mode(void) -{ - if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) - vgetcpu_mode = VGETCPU_RDTSCP; - else - vgetcpu_mode = VGETCPU_LSL; -} -#endif - -void __init identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); - init_amd_e400_c1e_mask(); -#ifdef CONFIG_X86_32 - sysenter_setup(); - enable_sep_cpu(); -#else - vgetcpu_set_mode(); -#endif -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); -#ifdef CONFIG_X86_32 - enable_sep_cpu(); -#endif - mtrr_ap_init(); -} - -struct msr_range { - unsigned min; - unsigned max; -}; - -static const struct msr_range msr_range_array[] __cpuinitconst = { - { 0x00000000, 0x00000418}, - { 0xc0000000, 0xc000040b}, - { 0xc0010000, 0xc0010142}, - { 0xc0011000, 0xc001103b}, -}; - -static void __cpuinit __print_cpu_msr(void) -{ - unsigned index_min, index_max; - unsigned index; - u64 val; - int i; - - for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { - index_min = msr_range_array[i].min; - index_max = msr_range_array[i].max; - - for (index = index_min; index < index_max; index++) { - if (rdmsrl_amd_safe(index, &val)) - continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); - } - } -} - -static int show_msr __cpuinitdata; - -static __init int setup_show_msr(char *arg) -{ - int num; - - get_option(&arg, &num); - - if (num > 0) - show_msr = num; - return 1; -} -__setup("show_msr=", setup_show_msr); - -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - const char *vendor = NULL; - - if (c->x86_vendor < X86_VENDOR_NUM) { - vendor = this_cpu->c_vendor; - } else { - if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - } - - if (vendor && !strstr(c->x86_model_id, vendor)) - printk(KERN_CONT "%s ", vendor); - - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); - else - printk(KERN_CONT "%d86", c->x86); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); - else - printk(KERN_CONT "\n"); - - print_cpu_msr(c); -} - -void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) -{ - if (c->cpu_index < show_msr) - __print_cpu_msr(); -} - -static __init int setup_disablecpuid(char *arg) -{ - int bit; - - if (get_option(&arg, &bit) && bit < NCAPINTS*32) - setup_clear_cpu_cap(bit); - else - return 0; - - return 1; -} -__setup("clearcpuid=", setup_disablecpuid); - -#ifdef CONFIG_X86_64 -struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; -struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, - (unsigned long) nmi_idt_table }; - -DEFINE_PER_CPU_FIRST(union irq_stack_union, - irq_stack_union) __aligned(PAGE_SIZE); - -/* - * The following four percpu variables are hot. Align current_task to - * cacheline size such that all four fall in the same cacheline. - */ -DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = - &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - -DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(kernel_stack); - -DEFINE_PER_CPU(char *, irq_stack_ptr) = - init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; - -DEFINE_PER_CPU(unsigned int, irq_count) = -1; - -DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); - -/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ -}; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) -{ - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); - -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); -#endif - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); -} - -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -static DEFINE_PER_CPU(unsigned long, debug_stack_addr); -DEFINE_PER_CPU(int, debug_stack_usage); - -int is_debug_stack(unsigned long addr) -{ - return __get_cpu_var(debug_stack_usage) || - (addr <= __get_cpu_var(debug_stack_addr) && - addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); -} - -void debug_stack_set_zero(void) -{ - load_idt((const struct desc_ptr *)&nmi_idt_descr); -} - -void debug_stack_reset(void) -{ - load_idt((const struct desc_ptr *)&idt_descr); -} - -#else /* CONFIG_X86_64 */ - -DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); - -#ifdef CONFIG_CC_STACKPROTECTOR -DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); -#endif - -/* Make sure %fs and %gs are initialized properly in idle threads */ -struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->fs = __KERNEL_PERCPU; - regs->gs = __KERNEL_STACK_CANARY; - - return regs; -} -#endif /* CONFIG_X86_64 */ - -/* - * Clear all 6 debug registers: - */ -static void clear_all_debug_regs(void) -{ - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - - set_debugreg(0, i); - } -} - -#ifdef CONFIG_KGDB -/* - * Restore debug regs if using kgdbwait and you have a kernel debugger - * connection established. - */ -static void dbg_restore_debug_regs(void) -{ - if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) - arch_kgdb_ops.correct_hw_break(); -} -#else /* ! CONFIG_KGDB */ -#define dbg_restore_debug_regs() -#endif /* ! CONFIG_KGDB */ - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit - */ -#ifdef CONFIG_X86_64 - -void __cpuinit cpu_init(void) -{ - struct orig_ist *oist; - struct task_struct *me; - struct tss_struct *t; - unsigned long v; - int cpu; - int i; - - cpu = stack_smp_processor_id(); - t = &per_cpu(init_tss, cpu); - oist = &per_cpu(orig_ist, cpu); - -#ifdef CONFIG_NUMA - if (cpu != 0 && percpu_read(numa_node) == 0 && - early_cpu_to_node(cpu) != NUMA_NO_NODE) - set_numa_node(early_cpu_to_node(cpu)); -#endif - - me = current; - - if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) - panic("CPU#%d already initialized!\n", cpu); - - pr_debug("Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - - switch_to_new_gdt(cpu); - loadsegment(fs, 0); - - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - x86_configure_nx(); - if (cpu != 0) - enable_x2apic(); - - /* - * set up and load the per-CPU TSS - */ - if (!oist->ist[0]) { - char *estacks = per_cpu(exception_stacks, cpu); - - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - estacks += exception_stack_sizes[v]; - oist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - if (v == DEBUG_STACK-1) - per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; - } - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - BUG_ON(me->mm); - enter_lazy_tlb(&init_mm, me); - - load_sp0(t, ¤t->thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - - clear_all_debug_regs(); - dbg_restore_debug_regs(); - - fpu_init(); - xsave_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} - -#else - -void __cpuinit cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; - - if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) - local_irq_enable(); - } - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_idt(&idt_descr); - switch_to_new_gdt(cpu); - - /* - * Set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - curr->active_mm = &init_mm; - BUG_ON(curr->mm); - enter_lazy_tlb(&init_mm, curr); - - load_sp0(t, thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - clear_all_debug_regs(); - dbg_restore_debug_regs(); - - fpu_init(); - xsave_init(); -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/cpu.h b/ANDROID_3.4.5/arch/x86/kernel/cpu/cpu.h deleted file mode 100644 index 8bacc782..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/cpu.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef ARCH_X86_CPU_H -#define ARCH_X86_CPU_H - -struct cpu_model_info { - int vendor; - int family; - const char *model_names[16]; -}; - -/* attempt to consolidate cpu attributes */ -struct cpu_dev { - const char *c_vendor; - - /* some have two possibilities for cpuid string */ - const char *c_ident[2]; - - struct cpu_model_info c_models[4]; - - void (*c_early_init)(struct cpuinfo_x86 *); - void (*c_bsp_init)(struct cpuinfo_x86 *); - void (*c_init)(struct cpuinfo_x86 *); - void (*c_identify)(struct cpuinfo_x86 *); - unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); - int c_x86_vendor; -}; - -#define cpu_dev_register(cpu_devX) \ - static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ - __attribute__((__section__(".x86_cpu_dev.init"))) = \ - &cpu_devX; - -extern const struct cpu_dev *const __x86_cpu_dev_start[], - *const __x86_cpu_dev_end[]; - -extern void get_cpu_cap(struct cpuinfo_x86 *c); -extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); -#endif /* ARCH_X86_CPU_H */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/cyrix.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/cyrix.c deleted file mode 100644 index 4fbd384f..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/cyrix.c +++ /dev/null @@ -1,461 +0,0 @@ -#include <linux/init.h> -#include <linux/bitops.h> -#include <linux/delay.h> -#include <linux/pci.h> -#include <asm/dma.h> -#include <linux/io.h> -#include <asm/processor-cyrix.h> -#include <asm/processor-flags.h> -#include <linux/timer.h> -#include <asm/pci-direct.h> -#include <asm/tsc.h> - -#include "cpu.h" - -/* - * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU - */ -static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) -{ - unsigned char ccr2, ccr3; - - /* we test for DEVID by checking whether CCR3 is writable */ - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, ccr3 ^ 0x80); - getCx86(0xc0); /* dummy to change bus */ - - if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ - ccr2 = getCx86(CX86_CCR2); - setCx86(CX86_CCR2, ccr2 ^ 0x04); - getCx86(0xc0); /* dummy */ - - if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ - *dir0 = 0xfd; - else { /* Cx486S A step */ - setCx86(CX86_CCR2, ccr2); - *dir0 = 0xfe; - } - } else { - setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ - - /* read DIR0 and DIR1 CPU registers */ - *dir0 = getCx86(CX86_DIR0); - *dir1 = getCx86(CX86_DIR1); - } -} - -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) -{ - unsigned long flags; - - local_irq_save(flags); - __do_cyrix_devid(dir0, dir1); - local_irq_restore(flags); -} -/* - * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in - * order to identify the Cyrix CPU model after we're out of setup.c - * - * Actually since bugs.h doesn't even reference this perhaps someone should - * fix the documentation ??? - */ -static unsigned char Cx86_dir0_msb __cpuinitdata = 0; - -static const char __cpuinitconst Cx86_model[][9] = { - "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", - "M II ", "Unknown" -}; -static const char __cpuinitconst Cx486_name[][5] = { - "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", - "SRx2", "DRx2" -}; -static const char __cpuinitconst Cx486S_name[][4] = { - "S", "S2", "Se", "S2e" -}; -static const char __cpuinitconst Cx486D_name[][4] = { - "DX", "DX2", "?", "?", "?", "DX4" -}; -static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; -static const char __cpuinitconst cyrix_model_mult1[] = "12??43"; -static const char __cpuinitconst cyrix_model_mult2[] = "12233445"; - -/* - * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old - * BIOSes for compatibility with DOS games. This makes the udelay loop - * work correctly, and improves performance. - * - * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP - */ - -static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) -{ - unsigned long flags; - - if (Cx86_dir0_msb == 3) { - unsigned char ccr3, ccr5; - - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ccr5 = getCx86(CX86_CCR5); - if (ccr5 & 2) - setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - local_irq_restore(flags); - - if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); - calibrate_delay(); - c->loops_per_jiffy = loops_per_jiffy; - } - } -} - - -static void __cpuinit set_cx86_reorder(void) -{ - u8 ccr3; - - printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - - /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); - /* set load/store serialize from 1GB to 4GB */ - ccr3 |= 0xe0; - setCx86(CX86_CCR3, ccr3); -} - -static void __cpuinit set_cx86_memwb(void) -{ - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); - - /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); - /* set 'Not Write-through' */ - write_cr0(read_cr0() | X86_CR0_NW); - /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); -} - -/* - * Configure later MediaGX and/or Geode processor. - */ - -static void __cpuinit geode_configure(void) -{ - unsigned long flags; - u8 ccr3; - local_irq_save(flags); - - /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - - - /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - - set_cx86_memwb(); - set_cx86_reorder(); - - local_irq_restore(flags); -} - -static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) -{ - unsigned char dir0, dir0_msn, dir1 = 0; - - __do_cyrix_devid(&dir0, &dir1); - dir0_msn = dir0 >> 4; /* identifies CPU "family" */ - - switch (dir0_msn) { - case 3: /* 6x86/6x86L */ - /* Emulate MTRRs using Cyrix's ARRs. */ - set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); - break; - case 5: /* 6x86MX/M II */ - /* Emulate MTRRs using Cyrix's ARRs. */ - set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); - break; - } -} - -static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) -{ - unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; - char *buf = c->x86_model_id; - const char *p = NULL; - - /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway - */ - clear_cpu_cap(c, 0*32+31); - - /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ - if (test_cpu_cap(c, 1*32+24)) { - clear_cpu_cap(c, 1*32+24); - set_cpu_cap(c, X86_FEATURE_CXMMX); - } - - do_cyrix_devid(&dir0, &dir1); - - check_cx686_slop(c); - - Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ - dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ - - /* common case step number/rev -- exceptions handled below */ - c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; - - /* Now cook; the original recipe is by Channing Corn, from Cyrix. - * We do the same thing for each generation: we work out - * the model, multiplier and stepping. Black magic included, - * to make the silicon step/rev numbers match the printed ones. - */ - - switch (dir0_msn) { - unsigned char tmp; - - case 0: /* Cx486SLC/DLC/SRx/DRx */ - p = Cx486_name[dir0_lsn & 7]; - break; - - case 1: /* Cx486S/DX/DX2/DX4 */ - p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] - : Cx486S_name[dir0_lsn & 3]; - break; - - case 2: /* 5x86 */ - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - p = Cx86_cb+2; - break; - - case 3: /* 6x86/6x86L */ - Cx86_cb[1] = ' '; - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - if (dir1 > 0x21) { /* 686L */ - Cx86_cb[0] = 'L'; - p = Cx86_cb; - (c->x86_model)++; - } else /* 686 */ - p = Cx86_cb+1; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); - /* 6x86's contain this bug */ - c->coma_bug = 1; - break; - - case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ -#ifdef CONFIG_PCI - { - u32 vendor, device; - /* - * It isn't really a PCI quirk directly, but the cure is the - * same. The MediaGX has deep magic SMM stuff that handles the - * SB emulation. It throws away the fifo on disable_dma() which - * is wrong and ruins the audio. - * - * Bug2: VSA1 has a wrap bug so that using maximum sized DMA - * causes bad things. According to NatSemi VSA2 has another - * bug to do with 'hlt'. I've not seen any boards using VSA2 - * and X doesn't seem to support it either so who cares 8). - * VSA1 we work around however. - */ - - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); - isa_dma_bridge_buggy = 2; - - /* We do this before the PCI layer is running. However we - are safe here as we know the bridge must be a Cyrix - companion and must be present */ - vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); - device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); - - /* - * The 5510/5520 companion chips have a funky PIT. - */ - if (vendor == PCI_VENDOR_ID_CYRIX && - (device == PCI_DEVICE_ID_CYRIX_5510 || - device == PCI_DEVICE_ID_CYRIX_5520)) - mark_tsc_unstable("cyrix 5510/5520 detected"); - } -#endif - c->x86_cache_size = 16; /* Yep 16K integrated cache thats it */ - - /* GXm supports extended cpuid levels 'ala' AMD */ - if (c->cpuid_level == 2) { - /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); - - /* - * GXm : 0x30 ... 0x5f GXm datasheet 51 - * GXlv: 0x6x GXlv datasheet 54 - * ? : 0x7x - * GX1 : 0x8x GX1 datasheet 56 - */ - if ((0x30 <= dir1 && dir1 <= 0x6f) || - (0x80 <= dir1 && dir1 <= 0x8f)) - geode_configure(); - return; - } else { /* MediaGX */ - Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; - p = Cx86_cb+2; - c->x86_model = (dir1 & 0x20) ? 1 : 2; - } - break; - - case 5: /* 6x86MX/M II */ - if (dir1 > 7) { - dir0_msn++; /* M II */ - /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); - } else { - c->coma_bug = 1; /* 6x86MX, it has the bug. */ - } - tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; - Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; - p = Cx86_cb+tmp; - if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) - (c->x86_model)++; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); - break; - - case 0xf: /* Cyrix 486 without DEVID registers */ - switch (dir0_lsn) { - case 0xd: /* either a 486SLC or DLC w/o DEVID */ - dir0_msn = 0; - p = Cx486_name[(c->hard_math) ? 1 : 0]; - break; - - case 0xe: /* a 486S A step */ - dir0_msn = 0; - p = Cx486S_name[0]; - break; - } - break; - - default: /* unknown (shouldn't happen, we know everyone ;-) */ - dir0_msn = 7; - break; - } - strcpy(buf, Cx86_model[dir0_msn & 7]); - if (p) - strcat(buf, p); - return; -} - -/* - * Handle National Semiconductor branded processors - */ -static void __cpuinit init_nsc(struct cpuinfo_x86 *c) -{ - /* - * There may be GX1 processors in the wild that are branded - * NSC and not Cyrix. - * - * This function only handles the GX processor, and kicks every - * thing else to the Cyrix init function above - that should - * cover any processors that might have been branded differently - * after NSC acquired Cyrix. - * - * If this breaks your GX1 horribly, please e-mail - * info-linux@ldcmail.amd.com to tell us. - */ - - /* Handle the GX (Formally known as the GX2) */ - - if (c->x86 == 5 && c->x86_model == 5) - cpu_detect_cache_sizes(c); - else - init_cyrix(c); -} - -/* - * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected - * by the fact that they preserve the flags across the division of 5/2. - * PII and PPro exhibit this behavior too, but they have cpuid available. - */ - -/* - * Perform the Cyrix 5/2 test. A Cyrix won't change - * the flags, while other 486 chips will. - */ -static inline int test_cyrix_52div(void) -{ - unsigned int test; - - __asm__ __volatile__( - "sahf\n\t" /* clear flags (%eax = 0x0005) */ - "div %b2\n\t" /* divide 5 by 2 */ - "lahf" /* store flags into %ah */ - : "=a" (test) - : "0" (5), "q" (2) - : "cc"); - - /* AH is 0x02 on Cyrix after the divide.. */ - return (unsigned char) (test >> 8) == 0x02; -} - -static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) -{ - /* Detect Cyrix with disabled CPUID */ - if (c->x86 == 4 && test_cyrix_52div()) { - unsigned char dir0, dir1; - - strcpy(c->x86_vendor_id, "CyrixInstead"); - c->x86_vendor = X86_VENDOR_CYRIX; - - /* Actually enable cpuid on the older cyrix */ - - /* Retrieve CPU revisions */ - - do_cyrix_devid(&dir0, &dir1); - - dir0 >>= 4; - - /* Check it is an affected model */ - - if (dir0 == 5 || dir0 == 3) { - unsigned char ccr3; - unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - /* enable MAPEN */ - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); - /* enable cpuid */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); - /* disable MAPEN */ - setCx86(CX86_CCR3, ccr3); - local_irq_restore(flags); - } - } -} - -static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = { - .c_vendor = "Cyrix", - .c_ident = { "CyrixInstead" }, - .c_early_init = early_init_cyrix, - .c_init = init_cyrix, - .c_identify = cyrix_identify, - .c_x86_vendor = X86_VENDOR_CYRIX, -}; - -cpu_dev_register(cyrix_cpu_dev); - -static const struct cpu_dev __cpuinitconst nsc_cpu_dev = { - .c_vendor = "NSC", - .c_ident = { "Geode by NSC" }, - .c_init = init_nsc, - .c_x86_vendor = X86_VENDOR_NSC, -}; - -cpu_dev_register(nsc_cpu_dev); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/hypervisor.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/hypervisor.c deleted file mode 100644 index 755f64fb..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/hypervisor.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Common hypervisor code - * - * Copyright (C) 2008, VMware, Inc. - * Author : Alok N Kataria <akataria@vmware.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#include <linux/module.h> -#include <asm/processor.h> -#include <asm/hypervisor.h> - -/* - * Hypervisor detect order. This is specified explicitly here because - * some hypervisors might implement compatibility modes for other - * hypervisors and therefore need to be detected in specific sequence. - */ -static const __initconst struct hypervisor_x86 * const hypervisors[] = -{ -#ifdef CONFIG_XEN_PVHVM - &x86_hyper_xen_hvm, -#endif - &x86_hyper_vmware, - &x86_hyper_ms_hyperv, -}; - -const struct hypervisor_x86 *x86_hyper; -EXPORT_SYMBOL(x86_hyper); - -static inline void __init -detect_hypervisor_vendor(void) -{ - const struct hypervisor_x86 *h, * const *p; - - for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { - h = *p; - if (h->detect()) { - x86_hyper = h; - printk(KERN_INFO "Hypervisor detected: %s\n", h->name); - break; - } - } -} - -void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) -{ - if (x86_hyper && x86_hyper->set_cpu_features) - x86_hyper->set_cpu_features(c); -} - -void __init init_hypervisor_platform(void) -{ - - detect_hypervisor_vendor(); - - if (!x86_hyper) - return; - - init_hypervisor(&boot_cpu_data); - - if (x86_hyper->init_platform) - x86_hyper->init_platform(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/intel.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/intel.c deleted file mode 100644 index 3e6ff6cb..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/intel.c +++ /dev/null @@ -1,555 +0,0 @@ -#include <linux/init.h> -#include <linux/kernel.h> - -#include <linux/string.h> -#include <linux/bitops.h> -#include <linux/smp.h> -#include <linux/sched.h> -#include <linux/thread_info.h> -#include <linux/module.h> -#include <linux/uaccess.h> - -#include <asm/processor.h> -#include <asm/pgtable.h> -#include <asm/msr.h> -#include <asm/bugs.h> -#include <asm/cpu.h> - -#ifdef CONFIG_X86_64 -#include <linux/topology.h> -#include <asm/numa_64.h> -#endif - -#include "cpu.h" - -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/mpspec.h> -#include <asm/apic.h> -#endif - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - u64 misc_enable; - - /* Unmask CPUID levels if masked: */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - - if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { - misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - c->cpuid_level = cpuid_eax(0); - get_cpu_cap(c); - } - } - - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - - if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) { - unsigned lower_word; - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* Required by the SDM */ - sync_core(); - rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); - } - - /* - * Atom erratum AAE44/AAF40/AAG38/AAH41: - * - * A race condition between speculative fetches and invalidating - * a large page. This is worked around in microcode, but we - * need the microcode to have already been loaded... so if it is - * not, recommend a BIOS update and disable large pages. - */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && - c->microcode < 0x20e) { - printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); - clear_cpu_cap(c, X86_FEATURE_PSE); - } - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; -#endif - - /* CPUID workaround for 0F33/0F34 CPU */ - if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) - c->x86_phys_bits = 36; - - /* - * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate - * with P/T states and does not stop in deep C-states. - * - * It is also reliable across cores and sockets. (but not across - * cabinets - we turn it off in that case explicitly.) - */ - if (c->x86_power & (1 << 8)) { - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - sched_clock_stable = 1; - } - - /* - * There is a known erratum on Pentium III and Core Solo - * and Core Duo CPUs. - * " Page with PAT set to WC while associated MTRR is UC - * may consolidate to UC " - * Because of this erratum, it is better to stick with - * setting WC in MTRR rather than using PAT on these CPUs. - * - * Enable PAT WC only on P4, Core 2 or later CPUs. - */ - if (c->x86 == 6 && c->x86_model < 15) - clear_cpu_cap(c, X86_FEATURE_PAT); - -#ifdef CONFIG_KMEMCHECK - /* - * P4s have a "fast strings" feature which causes single- - * stepping REP instructions to only generate a #DB on - * cache-line boundaries. - * - * Ingo Molnar reported a Pentium D (model 6) and a Xeon - * (model 2) with the same problem. - */ - if (c->x86 == 15) { - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - - if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { - printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); - - misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - } - } -#endif - - /* - * If fast string is not enabled in IA32_MISC_ENABLE for any reason, - * clear the fast string and enhanced fast string CPU capabilities. - */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { - printk(KERN_INFO "Disabled fast string operations\n"); - setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); - setup_clear_cpu_cap(X86_FEATURE_ERMS); - } - } -} - -#ifdef CONFIG_X86_32 -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __cpuinit ppro_with_ram_bug(void) -{ - /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - return 0; -} - -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - -static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) -{ - /* calling is from identify_secondary_cpu() ? */ - if (!c->cpu_index) - return; - - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) { - /* - * Remember we have B step Pentia with bugs - */ - WARN_ONCE(1, "WARNING: SMP operation may be unreliable" - "with B stepping processors.\n"); - } -} - -static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the - * system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - /* - * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until - * model 3 mask 3 - */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); - - /* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; - wrmsr(MSR_IA32_MISC_ENABLE, lo, hi); - } - } - - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif - - intel_smp_check(c); -} -#else -static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) -{ -} -#endif - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_NUMA - unsigned node; - int cpu = smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = numa_cpu_node(cpu); - if (node == NUMA_NO_NODE || !node_online(node)) { - /* reuse the value from init_cpu_to_node() */ - node = cpu_to_node(cpu); - } - numa_set_node(cpu, node); -#endif -} - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return (eax >> 26) + 1; - else - return 1; -} - -static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) -{ - /* Intel VMX MSR indicated features */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 - - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - - clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - clear_cpu_cap(c, X86_FEATURE_VNMI); - clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - clear_cpu_cap(c, X86_FEATURE_EPT); - clear_cpu_cap(c, X86_FEATURE_VPID); - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) - set_cpu_cap(c, X86_FEATURE_EPT); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - unsigned int l2 = 0; - - early_init_intel(c); - - intel_workarounds(c); - - /* - * Detect the extended topology information if available. This - * will reinitialise the initial_apicid which will be used - * in init_intel_cacheinfo() - */ - detect_extended_topology(c); - - l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - } - - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - } - - if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) - set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - /* - * Names for the Pentium II/Celeron processors - * detectable only by also checking the cache size. - * Dixon is NOT a Celeron. - */ - if (c->x86 == 6) { - char *p = NULL; - - switch (c->x86_model) { - case 5: - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) - p = "Celeron-A"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - - if (p) - strcpy(c->x86_model_id, p); - } - - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); -#endif - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - - /* Work around errata */ - srat_detect_node(c); - - if (cpu_has(c, X86_FEATURE_VMX)) - detect_vmx_virtcap(c); - - /* - * Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not. - * x86_energy_perf_policy(8) is available to change it at run-time - */ - if (cpu_has(c, X86_FEATURE_EPB)) { - u64 epb; - - rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { - printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" - " Set to 'normal', was 'performance'\n" - "ENERGY_PERF_BIAS: View and update with" - " x86_energy_perf_policy(8)\n"); - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; - wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - } - } -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* - * Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) - size = 256; - return size; -} -#endif - -static const struct cpu_dev __cpuinitconst intel_cpu_dev = { - .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", - [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", - [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { - [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", - [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", - [10] = "Pentium III (Cascades)", - [11] = "Pentium III (Tualatin)", - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = - { - [0] = "Pentium 4 (Unknown)", - [1] = "Pentium 4 (Willamette)", - [2] = "Pentium 4 (Northwood)", - [4] = "Pentium 4 (Foster)", - [5] = "Pentium 4 (Foster)", - } - }, - }, - .c_size_cache = intel_size_cache, -#endif - .c_early_init = early_init_intel, - .c_init = init_intel, - .c_x86_vendor = X86_VENDOR_INTEL, -}; - -cpu_dev_register(intel_cpu_dev); - diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/intel_cacheinfo.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/intel_cacheinfo.c deleted file mode 100644 index b8f3653d..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/intel_cacheinfo.c +++ /dev/null @@ -1,1216 +0,0 @@ -/* - * Routines to indentify caches on Intel CPU. - * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) - * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. - * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. - */ - -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/device.h> -#include <linux/compiler.h> -#include <linux/cpu.h> -#include <linux/sched.h> -#include <linux/pci.h> - -#include <asm/processor.h> -#include <linux/smp.h> -#include <asm/amd_nb.h> -#include <asm/smp.h> - -#define LVL_1_INST 1 -#define LVL_1_DATA 2 -#define LVL_2 3 -#define LVL_3 4 -#define LVL_TRACE 5 - -struct _cache_table { - unsigned char descriptor; - char cache_type; - short size; -}; - -#define MB(x) ((x) * 1024) - -/* All the cache descriptor types we care about (no TLB or - trace cache entries) */ - -static const struct _cache_table __cpuinitconst cache_table[] = -{ - { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ - { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ - { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ - { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ - { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ - { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ - { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ - { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */ - { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ - { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ - { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ - { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ - { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ - { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ - { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ - { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */ - { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ - { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ - { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ - { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ - { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ - { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ - { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ - { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ - { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ - { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */ - { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ - { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ - { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ - { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ - { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ - { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ - { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ - { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ - { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ - { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ - { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ - { 0x00, 0, 0} -}; - - -enum _cache_type { - CACHE_TYPE_NULL = 0, - CACHE_TYPE_DATA = 1, - CACHE_TYPE_INST = 2, - CACHE_TYPE_UNIFIED = 3 -}; - -union _cpuid4_leaf_eax { - struct { - enum _cache_type type:5; - unsigned int level:3; - unsigned int is_self_initializing:1; - unsigned int is_fully_associative:1; - unsigned int reserved:4; - unsigned int num_threads_sharing:12; - unsigned int num_cores_on_die:6; - } split; - u32 full; -}; - -union _cpuid4_leaf_ebx { - struct { - unsigned int coherency_line_size:12; - unsigned int physical_line_partition:10; - unsigned int ways_of_associativity:10; - } split; - u32 full; -}; - -union _cpuid4_leaf_ecx { - struct { - unsigned int number_of_sets:32; - } split; - u32 full; -}; - -struct _cpuid4_info_regs { - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned long size; - struct amd_northbridge *nb; -}; - -struct _cpuid4_info { - struct _cpuid4_info_regs base; - DECLARE_BITMAP(shared_cpu_map, NR_CPUS); -}; - -unsigned short num_cache_leaves; - -/* AMD doesn't have CPUID4. Emulate it here to report the same - information to the user. This makes some assumptions about the machine: - L2 not shared, no SMT etc. that is currently true on AMD CPUs. - - In theory the TLBs could be reported as fake type (they are in "dummy"). - Maybe later */ -union l1_cache { - struct { - unsigned line_size:8; - unsigned lines_per_tag:8; - unsigned assoc:8; - unsigned size_in_kb:8; - }; - unsigned val; -}; - -union l2_cache { - struct { - unsigned line_size:8; - unsigned lines_per_tag:4; - unsigned assoc:4; - unsigned size_in_kb:16; - }; - unsigned val; -}; - -union l3_cache { - struct { - unsigned line_size:8; - unsigned lines_per_tag:4; - unsigned assoc:4; - unsigned res:2; - unsigned size_encoded:14; - }; - unsigned val; -}; - -static const unsigned short __cpuinitconst assocs[] = { - [1] = 1, - [2] = 2, - [4] = 4, - [6] = 8, - [8] = 16, - [0xa] = 32, - [0xb] = 48, - [0xc] = 64, - [0xd] = 96, - [0xe] = 128, - [0xf] = 0xffff /* fully associative - no way to show this currently */ -}; - -static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; -static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 }; - -static void __cpuinit -amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) -{ - unsigned dummy; - unsigned line_size, lines_per_tag, assoc, size_in_kb; - union l1_cache l1i, l1d; - union l2_cache l2; - union l3_cache l3; - union l1_cache *l1 = &l1d; - - eax->full = 0; - ebx->full = 0; - ecx->full = 0; - - cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); - cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); - - switch (leaf) { - case 1: - l1 = &l1i; - case 0: - if (!l1->val) - return; - assoc = assocs[l1->assoc]; - line_size = l1->line_size; - lines_per_tag = l1->lines_per_tag; - size_in_kb = l1->size_in_kb; - break; - case 2: - if (!l2.val) - return; - assoc = assocs[l2.assoc]; - line_size = l2.line_size; - lines_per_tag = l2.lines_per_tag; - /* cpu_data has errata corrections for K7 applied */ - size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); - break; - case 3: - if (!l3.val) - return; - assoc = assocs[l3.assoc]; - line_size = l3.line_size; - lines_per_tag = l3.lines_per_tag; - size_in_kb = l3.size_encoded * 512; - if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { - size_in_kb = size_in_kb >> 1; - assoc = assoc >> 1; - } - break; - default: - return; - } - - eax->split.is_self_initializing = 1; - eax->split.type = types[leaf]; - eax->split.level = levels[leaf]; - eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; - - - if (assoc == 0xffff) - eax->split.is_fully_associative = 1; - ebx->split.coherency_line_size = line_size - 1; - ebx->split.ways_of_associativity = assoc - 1; - ebx->split.physical_line_partition = lines_per_tag - 1; - ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / - (ebx->split.ways_of_associativity + 1) - 1; -} - -struct _cache_attr { - struct attribute attr; - ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int); - ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count, - unsigned int); -}; - -#ifdef CONFIG_AMD_NB - -/* - * L3 cache descriptors - */ -static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb) -{ - struct amd_l3_cache *l3 = &nb->l3_cache; - unsigned int sc0, sc1, sc2, sc3; - u32 val = 0; - - pci_read_config_dword(nb->misc, 0x1C4, &val); - - /* calculate subcache sizes */ - l3->subcaches[0] = sc0 = !(val & BIT(0)); - l3->subcaches[1] = sc1 = !(val & BIT(4)); - - if (boot_cpu_data.x86 == 0x15) { - l3->subcaches[0] = sc0 += !(val & BIT(1)); - l3->subcaches[1] = sc1 += !(val & BIT(5)); - } - - l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); - l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); - - l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; -} - -static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) -{ - int node; - - /* only for L3, and not in virtualized environments */ - if (index < 3) - return; - - node = amd_get_nb_id(smp_processor_id()); - this_leaf->nb = node_to_amd_nb(node); - if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) - amd_calc_l3_indices(this_leaf->nb); -} - -/* - * check whether a slot used for disabling an L3 index is occupied. - * @l3: L3 cache descriptor - * @slot: slot number (0..1) - * - * @returns: the disabled index if used or negative value if slot free. - */ -int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) -{ - unsigned int reg = 0; - - pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®); - - /* check whether this slot is activated already */ - if (reg & (3UL << 30)) - return reg & 0xfff; - - return -1; -} - -static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, - unsigned int slot) -{ - int index; - - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - return -EINVAL; - - index = amd_get_l3_disable_slot(this_leaf->base.nb, slot); - if (index >= 0) - return sprintf(buf, "%d\n", index); - - return sprintf(buf, "FREE\n"); -} - -#define SHOW_CACHE_DISABLE(slot) \ -static ssize_t \ -show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \ - unsigned int cpu) \ -{ \ - return show_cache_disable(this_leaf, buf, slot); \ -} -SHOW_CACHE_DISABLE(0) -SHOW_CACHE_DISABLE(1) - -static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, - unsigned slot, unsigned long idx) -{ - int i; - - idx |= BIT(30); - - /* - * disable index in all 4 subcaches - */ - for (i = 0; i < 4; i++) { - u32 reg = idx | (i << 20); - - if (!nb->l3_cache.subcaches[i]) - continue; - - pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); - - /* - * We need to WBINVD on a core on the node containing the L3 - * cache which indices we disable therefore a simple wbinvd() - * is not sufficient. - */ - wbinvd_on_cpu(cpu); - - reg |= BIT(31); - pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); - } -} - -/* - * disable a L3 cache index by using a disable-slot - * - * @l3: L3 cache descriptor - * @cpu: A CPU on the node containing the L3 cache - * @slot: slot number (0..1) - * @index: index to disable - * - * @return: 0 on success, error status on failure - */ -int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, - unsigned long index) -{ - int ret = 0; - - /* check if @slot is already used or the index is already disabled */ - ret = amd_get_l3_disable_slot(nb, slot); - if (ret >= 0) - return -EEXIST; - - if (index > nb->l3_cache.indices) - return -EINVAL; - - /* check whether the other slot has disabled the same index already */ - if (index == amd_get_l3_disable_slot(nb, !slot)) - return -EEXIST; - - amd_l3_disable_index(nb, cpu, slot, index); - - return 0; -} - -static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, - const char *buf, size_t count, - unsigned int slot) -{ - unsigned long val = 0; - int cpu, err = 0; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - return -EINVAL; - - cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); - - if (strict_strtoul(buf, 10, &val) < 0) - return -EINVAL; - - err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); - if (err) { - if (err == -EEXIST) - pr_warning("L3 slot %d in use/index already disabled!\n", - slot); - return err; - } - return count; -} - -#define STORE_CACHE_DISABLE(slot) \ -static ssize_t \ -store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ - const char *buf, size_t count, \ - unsigned int cpu) \ -{ \ - return store_cache_disable(this_leaf, buf, count, slot); \ -} -STORE_CACHE_DISABLE(0) -STORE_CACHE_DISABLE(1) - -static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, - show_cache_disable_0, store_cache_disable_0); -static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, - show_cache_disable_1, store_cache_disable_1); - -static ssize_t -show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu) -{ - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - return -EINVAL; - - return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); -} - -static ssize_t -store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, - unsigned int cpu) -{ - unsigned long val; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - return -EINVAL; - - if (strict_strtoul(buf, 16, &val) < 0) - return -EINVAL; - - if (amd_set_subcaches(cpu, val)) - return -EINVAL; - - return count; -} - -static struct _cache_attr subcaches = - __ATTR(subcaches, 0644, show_subcaches, store_subcaches); - -#else /* CONFIG_AMD_NB */ -#define amd_init_l3_cache(x, y) -#endif /* CONFIG_AMD_NB */ - -static int -__cpuinit cpuid4_cache_lookup_regs(int index, - struct _cpuid4_info_regs *this_leaf) -{ - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned edx; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - amd_cpuid4(index, &eax, &ebx, &ecx); - amd_init_l3_cache(this_leaf, index); - } else { - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); - } - - if (eax.split.type == CACHE_TYPE_NULL) - return -EIO; /* better error ? */ - - this_leaf->eax = eax; - this_leaf->ebx = ebx; - this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); - return 0; -} - -static int __cpuinit find_num_cache_leaves(void) -{ - unsigned int eax, ebx, ecx, edx; - union _cpuid4_leaf_eax cache_eax; - int i = -1; - - do { - ++i; - /* Do cpuid(4) loop to find out num_cache_leaves */ - cpuid_count(4, i, &eax, &ebx, &ecx, &edx); - cache_eax.full = eax; - } while (cache_eax.split.type != CACHE_TYPE_NULL); - return i; -} - -unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) -{ - /* Cache sizes */ - unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; - unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ - unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_X86_HT - unsigned int cpu = c->cpu_index; -#endif - - if (c->cpuid_level > 3) { - static int is_initialized; - - if (is_initialized == 0) { - /* Init num_cache_leaves from boot CPU */ - num_cache_leaves = find_num_cache_leaves(); - is_initialized++; - } - - /* - * Whenever possible use cpuid(4), deterministic cache - * parameters cpuid leaf to find the cache details - */ - for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info_regs this_leaf; - int retval; - - retval = cpuid4_cache_lookup_regs(i, &this_leaf); - if (retval >= 0) { - switch (this_leaf.eax.split.level) { - case 1: - if (this_leaf.eax.split.type == - CACHE_TYPE_DATA) - new_l1d = this_leaf.size/1024; - else if (this_leaf.eax.split.type == - CACHE_TYPE_INST) - new_l1i = this_leaf.size/1024; - break; - case 2: - new_l2 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l2_id = c->apicid >> index_msb; - break; - case 3: - new_l3 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order( - num_threads_sharing); - l3_id = c->apicid >> index_msb; - break; - default: - break; - } - } - } - } - /* - * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for - * trace cache - */ - if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { - /* supports eax=2 call */ - int j, n; - unsigned int regs[4]; - unsigned char *dp = (unsigned char *)regs; - int only_trace = 0; - - if (num_cache_leaves != 0 && c->x86 == 15) - only_trace = 1; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for (i = 0 ; i < n ; i++) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) - if (regs[j] & (1 << 31)) - regs[j] = 0; - - /* Byte 0 is level count, not a descriptor */ - for (j = 1 ; j < 16 ; j++) { - unsigned char des = dp[j]; - unsigned char k = 0; - - /* look up this descriptor in the table */ - while (cache_table[k].descriptor != 0) { - if (cache_table[k].descriptor == des) { - if (only_trace && cache_table[k].cache_type != LVL_TRACE) - break; - switch (cache_table[k].cache_type) { - case LVL_1_INST: - l1i += cache_table[k].size; - break; - case LVL_1_DATA: - l1d += cache_table[k].size; - break; - case LVL_2: - l2 += cache_table[k].size; - break; - case LVL_3: - l3 += cache_table[k].size; - break; - case LVL_TRACE: - trace += cache_table[k].size; - break; - } - - break; - } - - k++; - } - } - } - } - - if (new_l1d) - l1d = new_l1d; - - if (new_l1i) - l1i = new_l1i; - - if (new_l2) { - l2 = new_l2; -#ifdef CONFIG_X86_HT - per_cpu(cpu_llc_id, cpu) = l2_id; -#endif - } - - if (new_l3) { - l3 = new_l3; -#ifdef CONFIG_X86_HT - per_cpu(cpu_llc_id, cpu) = l3_id; -#endif - } - - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - - return l2; -} - -#ifdef CONFIG_SYSFS - -/* pointer to _cpuid4_info array (for each cache leaf) */ -static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) - -#ifdef CONFIG_SMP - -static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf; - int ret, i, sibling; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - ret = 0; - if (index == 3) { - ret = 1; - for_each_cpu(i, cpu_llc_shared_mask(cpu)) { - if (!per_cpu(ici_cpuid4_info, i)) - continue; - this_leaf = CPUID4_INFO_IDX(i, index); - for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { - if (!cpu_online(sibling)) - continue; - set_bit(sibling, this_leaf->shared_cpu_map); - } - } - } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) { - ret = 1; - for_each_cpu(i, cpu_sibling_mask(cpu)) { - if (!per_cpu(ici_cpuid4_info, i)) - continue; - this_leaf = CPUID4_INFO_IDX(i, index); - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { - if (!cpu_online(sibling)) - continue; - set_bit(sibling, this_leaf->shared_cpu_map); - } - } - } - - return ret; -} - -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf, *sibling_leaf; - unsigned long num_threads_sharing; - int index_msb, i; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - if (c->x86_vendor == X86_VENDOR_AMD) { - if (cache_shared_amd_cpu_map_setup(cpu, index)) - return; - } - - this_leaf = CPUID4_INFO_IDX(cpu, index); - num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; - - if (num_threads_sharing == 1) - cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); - else { - index_msb = get_count_order(num_threads_sharing); - - for_each_online_cpu(i) { - if (cpu_data(i).apicid >> index_msb == - c->apicid >> index_msb) { - cpumask_set_cpu(i, - to_cpumask(this_leaf->shared_cpu_map)); - if (i != cpu && per_cpu(ici_cpuid4_info, i)) { - sibling_leaf = - CPUID4_INFO_IDX(i, index); - cpumask_set_cpu(cpu, to_cpumask( - sibling_leaf->shared_cpu_map)); - } - } - } - } -} -static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf, *sibling_leaf; - int sibling; - - this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpumask_clear_cpu(cpu, - to_cpumask(sibling_leaf->shared_cpu_map)); - } -} -#else -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) -{ -} - -static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) -{ -} -#endif - -static void __cpuinit free_cache_attributes(unsigned int cpu) -{ - int i; - - for (i = 0; i < num_cache_leaves; i++) - cache_remove_shared_cpu_map(cpu, i); - - kfree(per_cpu(ici_cpuid4_info, cpu)); - per_cpu(ici_cpuid4_info, cpu) = NULL; -} - -static void __cpuinit get_cpu_leaves(void *_retval) -{ - int j, *retval = _retval, cpu = smp_processor_id(); - - /* Do cpuid and store the results */ - for (j = 0; j < num_cache_leaves; j++) { - struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j); - - *retval = cpuid4_cache_lookup_regs(j, &this_leaf->base); - if (unlikely(*retval < 0)) { - int i; - - for (i = 0; i < j; i++) - cache_remove_shared_cpu_map(cpu, i); - break; - } - cache_shared_cpu_map_setup(cpu, j); - } -} - -static int __cpuinit detect_cache_attributes(unsigned int cpu) -{ - int retval; - - if (num_cache_leaves == 0) - return -ENOENT; - - per_cpu(ici_cpuid4_info, cpu) = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(ici_cpuid4_info, cpu) == NULL) - return -ENOMEM; - - smp_call_function_single(cpu, get_cpu_leaves, &retval, true); - if (retval) { - kfree(per_cpu(ici_cpuid4_info, cpu)); - per_cpu(ici_cpuid4_info, cpu) = NULL; - } - - return retval; -} - -#include <linux/kobject.h> -#include <linux/sysfs.h> -#include <linux/cpu.h> - -/* pointer to kobject for cpuX/cache */ -static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); - -struct _index_kobject { - struct kobject kobj; - unsigned int cpu; - unsigned short index; -}; - -/* pointer to array of kobjects for cpuX/cache/indexY */ -static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) - -#define show_one_plus(file_name, object, val) \ -static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \ - unsigned int cpu) \ -{ \ - return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ -} - -show_one_plus(level, base.eax.split.level, 0); -show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1); -show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1); -show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1); -show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1); - -static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, - unsigned int cpu) -{ - return sprintf(buf, "%luK\n", this_leaf->base.size / 1024); -} - -static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, - int type, char *buf) -{ - ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; - int n = 0; - - if (len > 1) { - const struct cpumask *mask; - - mask = to_cpumask(this_leaf->shared_cpu_map); - n = type ? - cpulist_scnprintf(buf, len-2, mask) : - cpumask_scnprintf(buf, len-2, mask); - buf[n++] = '\n'; - buf[n] = '\0'; - } - return n; -} - -static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf, - unsigned int cpu) -{ - return show_shared_cpu_map_func(leaf, 0, buf); -} - -static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf, - unsigned int cpu) -{ - return show_shared_cpu_map_func(leaf, 1, buf); -} - -static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf, - unsigned int cpu) -{ - switch (this_leaf->base.eax.split.type) { - case CACHE_TYPE_DATA: - return sprintf(buf, "Data\n"); - case CACHE_TYPE_INST: - return sprintf(buf, "Instruction\n"); - case CACHE_TYPE_UNIFIED: - return sprintf(buf, "Unified\n"); - default: - return sprintf(buf, "Unknown\n"); - } -} - -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - -#define define_one_ro(_name) \ -static struct _cache_attr _name = \ - __ATTR(_name, 0444, show_##_name, NULL) - -define_one_ro(level); -define_one_ro(type); -define_one_ro(coherency_line_size); -define_one_ro(physical_line_partition); -define_one_ro(ways_of_associativity); -define_one_ro(number_of_sets); -define_one_ro(size); -define_one_ro(shared_cpu_map); -define_one_ro(shared_cpu_list); - -static struct attribute *default_attrs[] = { - &type.attr, - &level.attr, - &coherency_line_size.attr, - &physical_line_partition.attr, - &ways_of_associativity.attr, - &number_of_sets.attr, - &size.attr, - &shared_cpu_map.attr, - &shared_cpu_list.attr, - NULL -}; - -#ifdef CONFIG_AMD_NB -static struct attribute ** __cpuinit amd_l3_attrs(void) -{ - static struct attribute **attrs; - int n; - - if (attrs) - return attrs; - - n = sizeof (default_attrs) / sizeof (struct attribute *); - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - n += 2; - - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - n += 1; - - attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); - if (attrs == NULL) - return attrs = default_attrs; - - for (n = 0; default_attrs[n]; n++) - attrs[n] = default_attrs[n]; - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { - attrs[n++] = &cache_disable_0.attr; - attrs[n++] = &cache_disable_1.attr; - } - - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - attrs[n++] = &subcaches.attr; - - return attrs; -} -#endif - -static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) -{ - struct _cache_attr *fattr = to_attr(attr); - struct _index_kobject *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->show ? - fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, this_leaf->cpu) : - 0; - return ret; -} - -static ssize_t store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - struct _cache_attr *fattr = to_attr(attr); - struct _index_kobject *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->store ? - fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, count, this_leaf->cpu) : - 0; - return ret; -} - -static const struct sysfs_ops sysfs_ops = { - .show = show, - .store = store, -}; - -static struct kobj_type ktype_cache = { - .sysfs_ops = &sysfs_ops, - .default_attrs = default_attrs, -}; - -static struct kobj_type ktype_percpu_entry = { - .sysfs_ops = &sysfs_ops, -}; - -static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) -{ - kfree(per_cpu(ici_cache_kobject, cpu)); - kfree(per_cpu(ici_index_kobject, cpu)); - per_cpu(ici_cache_kobject, cpu) = NULL; - per_cpu(ici_index_kobject, cpu) = NULL; - free_cache_attributes(cpu); -} - -static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) -{ - int err; - - if (num_cache_leaves == 0) - return -ENOENT; - - err = detect_cache_attributes(cpu); - if (err) - return err; - - /* Allocate all required memory */ - per_cpu(ici_cache_kobject, cpu) = - kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL)) - goto err_out; - - per_cpu(ici_index_kobject, cpu) = kzalloc( - sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); - if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL)) - goto err_out; - - return 0; - -err_out: - cpuid4_cache_sysfs_exit(cpu); - return -ENOMEM; -} - -static DECLARE_BITMAP(cache_dev_map, NR_CPUS); - -/* Add/Remove cache interface for CPU device */ -static int __cpuinit cache_add_dev(struct device *dev) -{ - unsigned int cpu = dev->id; - unsigned long i, j; - struct _index_kobject *this_object; - struct _cpuid4_info *this_leaf; - int retval; - - retval = cpuid4_cache_sysfs_init(cpu); - if (unlikely(retval < 0)) - return retval; - - retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), - &ktype_percpu_entry, - &dev->kobj, "%s", "cache"); - if (retval < 0) { - cpuid4_cache_sysfs_exit(cpu); - return retval; - } - - for (i = 0; i < num_cache_leaves; i++) { - this_object = INDEX_KOBJECT_PTR(cpu, i); - this_object->cpu = cpu; - this_object->index = i; - - this_leaf = CPUID4_INFO_IDX(cpu, i); - - ktype_cache.default_attrs = default_attrs; -#ifdef CONFIG_AMD_NB - if (this_leaf->base.nb) - ktype_cache.default_attrs = amd_l3_attrs(); -#endif - retval = kobject_init_and_add(&(this_object->kobj), - &ktype_cache, - per_cpu(ici_cache_kobject, cpu), - "index%1lu", i); - if (unlikely(retval)) { - for (j = 0; j < i; j++) - kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); - kobject_put(per_cpu(ici_cache_kobject, cpu)); - cpuid4_cache_sysfs_exit(cpu); - return retval; - } - kobject_uevent(&(this_object->kobj), KOBJ_ADD); - } - cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); - - kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD); - return 0; -} - -static void __cpuinit cache_remove_dev(struct device *dev) -{ - unsigned int cpu = dev->id; - unsigned long i; - - if (per_cpu(ici_cpuid4_info, cpu) == NULL) - return; - if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) - return; - cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); - - for (i = 0; i < num_cache_leaves; i++) - kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); - kobject_put(per_cpu(ici_cache_kobject, cpu)); - cpuid4_cache_sysfs_exit(cpu); -} - -static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - - dev = get_cpu_device(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cache_add_dev(dev); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - cache_remove_dev(dev); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { - .notifier_call = cacheinfo_cpu_callback, -}; - -static int __cpuinit cache_sysfs_init(void) -{ - int i; - - if (num_cache_leaves == 0) - return 0; - - for_each_online_cpu(i) { - int err; - struct device *dev = get_cpu_device(i); - - err = cache_add_dev(dev); - if (err) - return err; - } - register_hotcpu_notifier(&cacheinfo_cpu_notifier); - return 0; -} - -device_initcall(cache_sysfs_init); - -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/match.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/match.c deleted file mode 100644 index 5502b289..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/match.c +++ /dev/null @@ -1,91 +0,0 @@ -#include <asm/cpu_device_id.h> -#include <asm/processor.h> -#include <linux/cpu.h> -#include <linux/module.h> -#include <linux/slab.h> - -/** - * x86_match_cpu - match current CPU again an array of x86_cpu_ids - * @match: Pointer to array of x86_cpu_ids. Last entry terminated with - * {}. - * - * Return the entry if the current CPU matches the entries in the - * passed x86_cpu_id match table. Otherwise NULL. The match table - * contains vendor (X86_VENDOR_*), family, model and feature bits or - * respective wildcard entries. - * - * A typical table entry would be to match a specific CPU - * { X86_VENDOR_INTEL, 6, 0x12 } - * or to match a specific CPU feature - * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } - * - * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, - * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) - * - * Arrays used to match for this should also be declared using - * MODULE_DEVICE_TABLE(x86_cpu, ...) - * - * This always matches against the boot cpu, assuming models and features are - * consistent over all CPUs. - */ -const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) -{ - const struct x86_cpu_id *m; - struct cpuinfo_x86 *c = &boot_cpu_data; - - for (m = match; m->vendor | m->family | m->model | m->feature; m++) { - if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) - continue; - if (m->family != X86_FAMILY_ANY && c->x86 != m->family) - continue; - if (m->model != X86_MODEL_ANY && c->x86_model != m->model) - continue; - if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) - continue; - return m; - } - return NULL; -} -EXPORT_SYMBOL(x86_match_cpu); - -ssize_t arch_print_cpu_modalias(struct device *dev, - struct device_attribute *attr, - char *bufptr) -{ - int size = PAGE_SIZE; - int i, n; - char *buf = bufptr; - - n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:" - "model:%04X:feature:", - boot_cpu_data.x86_vendor, - boot_cpu_data.x86, - boot_cpu_data.x86_model); - size -= n; - buf += n; - size -= 1; - for (i = 0; i < NCAPINTS*32; i++) { - if (boot_cpu_has(i)) { - n = snprintf(buf, size, ",%04X", i); - if (n >= size) { - WARN(1, "x86 features overflow page\n"); - break; - } - size -= n; - buf += n; - } - } - *buf++ = '\n'; - return buf - bufptr; -} - -int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (buf) { - arch_print_cpu_modalias(NULL, NULL, buf); - add_uevent_var(env, "MODALIAS=%s", buf); - kfree(buf); - } - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/Makefile b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/Makefile deleted file mode 100644 index bb34b03a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -obj-y = mce.o mce-severity.o - -obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o -obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o -obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o -obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o - -obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o - -obj-$(CONFIG_ACPI_APEI) += mce-apei.o diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-apei.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-apei.c deleted file mode 100644 index 507ea586..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Bridge between MCE and APEI - * - * On some machine, corrected memory errors are reported via APEI - * generic hardware error source (GHES) instead of corrected Machine - * Check. These corrected memory errors can be reported to user space - * through /dev/mcelog via faking a corrected Machine Check, so that - * the error memory page can be offlined by /sbin/mcelog if the error - * count for one page is beyond the threshold. - * - * For fatal MCE, save MCE record into persistent storage via ERST, so - * that the MCE record can be logged after reboot via ERST. - * - * Copyright 2010 Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/acpi.h> -#include <linux/cper.h> -#include <acpi/apei.h> -#include <asm/mce.h> - -#include "mce-internal.h" - -void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) -{ - struct mce m; - - /* Only corrected MC is reported */ - if (!corrected) - return; - - mce_setup(&m); - m.bank = 1; - /* Fake a memory read corrected error with unknown channel */ - m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; - m.addr = mem_err->physical_addr; - mce_log(&m); - mce_notify_irq(); -} -EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); - -#define CPER_CREATOR_MCE \ - UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ - 0x64, 0x90, 0xb8, 0x9d) -#define CPER_SECTION_TYPE_MCE \ - UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ - 0x04, 0x4a, 0x38, 0xfc) - -/* - * CPER specification (in UEFI specification 2.3 appendix N) requires - * byte-packed. - */ -struct cper_mce_record { - struct cper_record_header hdr; - struct cper_section_descriptor sec_hdr; - struct mce mce; -} __packed; - -int apei_write_mce(struct mce *m) -{ - struct cper_mce_record rcd; - - memset(&rcd, 0, sizeof(rcd)); - memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); - rcd.hdr.revision = CPER_RECORD_REV; - rcd.hdr.signature_end = CPER_SIG_END; - rcd.hdr.section_count = 1; - rcd.hdr.error_severity = CPER_SEV_FATAL; - /* timestamp, platform_id, partition_id are all invalid */ - rcd.hdr.validation_bits = 0; - rcd.hdr.record_length = sizeof(rcd); - rcd.hdr.creator_id = CPER_CREATOR_MCE; - rcd.hdr.notification_type = CPER_NOTIFY_MCE; - rcd.hdr.record_id = cper_next_record_id(); - rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; - - rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; - rcd.sec_hdr.section_length = sizeof(rcd.mce); - rcd.sec_hdr.revision = CPER_SEC_REV; - /* fru_id and fru_text is invalid */ - rcd.sec_hdr.validation_bits = 0; - rcd.sec_hdr.flags = CPER_SEC_PRIMARY; - rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; - rcd.sec_hdr.section_severity = CPER_SEV_FATAL; - - memcpy(&rcd.mce, m, sizeof(*m)); - - return erst_write(&rcd.hdr); -} - -ssize_t apei_read_mce(struct mce *m, u64 *record_id) -{ - struct cper_mce_record rcd; - int rc, pos; - - rc = erst_get_record_id_begin(&pos); - if (rc) - return rc; -retry: - rc = erst_get_record_id_next(&pos, record_id); - if (rc) - goto out; - /* no more record */ - if (*record_id == APEI_ERST_INVALID_RECORD_ID) - goto out; - rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd)); - /* someone else has cleared the record, try next one */ - if (rc == -ENOENT) - goto retry; - else if (rc < 0) - goto out; - /* try to skip other type records in storage */ - else if (rc != sizeof(rcd) || - uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) - goto retry; - memcpy(m, &rcd.mce, sizeof(*m)); - rc = sizeof(*m); -out: - erst_get_record_id_end(); - - return rc; -} - -/* Check whether there is record in ERST */ -int apei_check_mce(void) -{ - return erst_get_record_count(); -} - -int apei_clear_mce(u64 record_id) -{ - return erst_clear(record_id); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-inject.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-inject.c deleted file mode 100644 index fc4beb39..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Machine check injection support. - * Copyright 2008 Intel Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Authors: - * Andi Kleen - * Ying Huang - */ -#include <linux/uaccess.h> -#include <linux/module.h> -#include <linux/timer.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/preempt.h> -#include <linux/smp.h> -#include <linux/notifier.h> -#include <linux/kdebug.h> -#include <linux/cpu.h> -#include <linux/sched.h> -#include <linux/gfp.h> -#include <asm/mce.h> -#include <asm/apic.h> -#include <asm/nmi.h> - -/* Update fake mce registers on current CPU. */ -static void inject_mce(struct mce *m) -{ - struct mce *i = &per_cpu(injectm, m->extcpu); - - /* Make sure no one reads partially written injectm */ - i->finished = 0; - mb(); - m->finished = 0; - /* First set the fields after finished */ - i->extcpu = m->extcpu; - mb(); - /* Now write record in order, finished last (except above) */ - memcpy(i, m, sizeof(struct mce)); - /* Finally activate it */ - mb(); - i->finished = 1; -} - -static void raise_poll(struct mce *m) -{ - unsigned long flags; - mce_banks_t b; - - memset(&b, 0xff, sizeof(mce_banks_t)); - local_irq_save(flags); - machine_check_poll(0, &b); - local_irq_restore(flags); - m->finished = 0; -} - -static void raise_exception(struct mce *m, struct pt_regs *pregs) -{ - struct pt_regs regs; - unsigned long flags; - - if (!pregs) { - memset(®s, 0, sizeof(struct pt_regs)); - regs.ip = m->ip; - regs.cs = m->cs; - pregs = ®s; - } - /* in mcheck exeception handler, irq will be disabled */ - local_irq_save(flags); - do_machine_check(pregs, 0); - local_irq_restore(flags); - m->finished = 0; -} - -static cpumask_var_t mce_inject_cpumask; - -static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) -{ - int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); - if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) - return NMI_DONE; - cpumask_clear_cpu(cpu, mce_inject_cpumask); - if (m->inject_flags & MCJ_EXCEPTION) - raise_exception(m, regs); - else if (m->status) - raise_poll(m); - return NMI_HANDLED; -} - -static void mce_irq_ipi(void *info) -{ - int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); - - if (cpumask_test_cpu(cpu, mce_inject_cpumask) && - m->inject_flags & MCJ_EXCEPTION) { - cpumask_clear_cpu(cpu, mce_inject_cpumask); - raise_exception(m, NULL); - } -} - -/* Inject mce on current CPU */ -static int raise_local(void) -{ - struct mce *m = &__get_cpu_var(injectm); - int context = MCJ_CTX(m->inject_flags); - int ret = 0; - int cpu = m->extcpu; - - if (m->inject_flags & MCJ_EXCEPTION) { - printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); - switch (context) { - case MCJ_CTX_IRQ: - /* - * Could do more to fake interrupts like - * calling irq_enter, but the necessary - * machinery isn't exported currently. - */ - /*FALL THROUGH*/ - case MCJ_CTX_PROCESS: - raise_exception(m, NULL); - break; - default: - printk(KERN_INFO "Invalid MCE context\n"); - ret = -EINVAL; - } - printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); - } else if (m->status) { - printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); - raise_poll(m); - mce_notify_irq(); - printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); - } else - m->finished = 0; - - return ret; -} - -static void raise_mce(struct mce *m) -{ - int context = MCJ_CTX(m->inject_flags); - - inject_mce(m); - - if (context == MCJ_CTX_RANDOM) - return; - -#ifdef CONFIG_X86_LOCAL_APIC - if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { - unsigned long start; - int cpu; - - get_online_cpus(); - cpumask_copy(mce_inject_cpumask, cpu_online_mask); - cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); - for_each_online_cpu(cpu) { - struct mce *mcpu = &per_cpu(injectm, cpu); - if (!mcpu->finished || - MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) - cpumask_clear_cpu(cpu, mce_inject_cpumask); - } - if (!cpumask_empty(mce_inject_cpumask)) { - if (m->inject_flags & MCJ_IRQ_BRAODCAST) { - /* - * don't wait because mce_irq_ipi is necessary - * to be sync with following raise_local - */ - preempt_disable(); - smp_call_function_many(mce_inject_cpumask, - mce_irq_ipi, NULL, 0); - preempt_enable(); - } else if (m->inject_flags & MCJ_NMI_BROADCAST) - apic->send_IPI_mask(mce_inject_cpumask, - NMI_VECTOR); - } - start = jiffies; - while (!cpumask_empty(mce_inject_cpumask)) { - if (!time_before(jiffies, start + 2*HZ)) { - printk(KERN_ERR - "Timeout waiting for mce inject %lx\n", - *cpumask_bits(mce_inject_cpumask)); - break; - } - cpu_relax(); - } - raise_local(); - put_cpu(); - put_online_cpus(); - } else -#endif - raise_local(); -} - -/* Error injection interface */ -static ssize_t mce_write(struct file *filp, const char __user *ubuf, - size_t usize, loff_t *off) -{ - struct mce m; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - /* - * There are some cases where real MSR reads could slip - * through. - */ - if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) - return -EIO; - - if ((unsigned long)usize > sizeof(struct mce)) - usize = sizeof(struct mce); - if (copy_from_user(&m, ubuf, usize)) - return -EFAULT; - - if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) - return -EINVAL; - - /* - * Need to give user space some time to set everything up, - * so do it a jiffie or two later everywhere. - */ - schedule_timeout(2); - raise_mce(&m); - return usize; -} - -static int inject_init(void) -{ - if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) - return -ENOMEM; - printk(KERN_INFO "Machine check injector initialized\n"); - register_mce_write_callback(mce_write); - register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, - "mce_notify"); - return 0; -} - -module_init(inject_init); -/* - * Cannot tolerate unloading currently because we cannot - * guarantee all openers of mce_chrdev will get a reference to us. - */ -MODULE_LICENSE("GPL"); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-internal.h b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-internal.h deleted file mode 100644 index ed44c8a6..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ /dev/null @@ -1,53 +0,0 @@ -#include <linux/device.h> -#include <asm/mce.h> - -enum severity_level { - MCE_NO_SEVERITY, - MCE_KEEP_SEVERITY, - MCE_SOME_SEVERITY, - MCE_AO_SEVERITY, - MCE_UC_SEVERITY, - MCE_AR_SEVERITY, - MCE_PANIC_SEVERITY, -}; - -#define ATTR_LEN 16 - -/* One object for each MCE bank, shared by all CPUs */ -struct mce_bank { - u64 ctl; /* subevents to enable */ - unsigned char init; /* initialise bank? */ - struct device_attribute attr; /* device attribute */ - char attrname[ATTR_LEN]; /* attribute name */ -}; - -int mce_severity(struct mce *a, int tolerant, char **msg); -struct dentry *mce_get_debugfs_dir(void); - -extern int mce_ser; - -extern struct mce_bank *mce_banks; - -#ifdef CONFIG_ACPI_APEI -int apei_write_mce(struct mce *m); -ssize_t apei_read_mce(struct mce *m, u64 *record_id); -int apei_check_mce(void); -int apei_clear_mce(u64 record_id); -#else -static inline int apei_write_mce(struct mce *m) -{ - return -EINVAL; -} -static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) -{ - return 0; -} -static inline int apei_check_mce(void) -{ - return 0; -} -static inline int apei_clear_mce(u64 record_id) -{ - return -EINVAL; -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-severity.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-severity.c deleted file mode 100644 index 1ccd4539..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * MCE grading rules. - * Copyright 2008, 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Author: Andi Kleen - */ -#include <linux/kernel.h> -#include <linux/seq_file.h> -#include <linux/init.h> -#include <linux/debugfs.h> -#include <asm/mce.h> - -#include "mce-internal.h" - -/* - * Grade an mce by severity. In general the most severe ones are processed - * first. Since there are quite a lot of combinations test the bits in a - * table-driven way. The rules are simply processed in order, first - * match wins. - * - * Note this is only used for machine check exceptions, the corrected - * errors use much simpler rules. The exceptions still check for the corrected - * errors, but only to leave them alone for the CMCI handler (except for - * panic situations) - */ - -enum context { IN_KERNEL = 1, IN_USER = 2 }; -enum ser { SER_REQUIRED = 1, NO_SER = 2 }; - -static struct severity { - u64 mask; - u64 result; - unsigned char sev; - unsigned char mcgmask; - unsigned char mcgres; - unsigned char ser; - unsigned char context; - unsigned char covered; - char *msg; -} severities[] = { -#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } -#define KERNEL .context = IN_KERNEL -#define USER .context = IN_USER -#define SER .ser = SER_REQUIRED -#define NOSER .ser = NO_SER -#define BITCLR(x) .mask = x, .result = 0 -#define BITSET(x) .mask = x, .result = x -#define MCGMASK(x, y) .mcgmask = x, .mcgres = y -#define MASK(x, y) .mask = x, .result = y -#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) -#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) -#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) -#define MCACOD 0xffff -/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ -#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ -#define MCACOD_SCRUBMSK 0xfff0 -#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ -#define MCACOD_DATA 0x0134 /* Data Load */ -#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ - - MCESEV( - NO, "Invalid", - BITCLR(MCI_STATUS_VAL) - ), - MCESEV( - NO, "Not enabled", - BITCLR(MCI_STATUS_EN) - ), - MCESEV( - PANIC, "Processor context corrupt", - BITSET(MCI_STATUS_PCC) - ), - /* When MCIP is not set something is very confused */ - MCESEV( - PANIC, "MCIP not set in MCA handler", - MCGMASK(MCG_STATUS_MCIP, 0) - ), - /* Neither return not error IP -- no chance to recover -> PANIC */ - MCESEV( - PANIC, "Neither restart nor error IP", - MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) - ), - MCESEV( - PANIC, "In kernel and no restart IP", - KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) - ), - MCESEV( - KEEP, "Corrected error", - NOSER, BITCLR(MCI_STATUS_UC) - ), - - /* ignore OVER for UCNA */ - MCESEV( - KEEP, "Uncorrected no action required", - SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) - ), - MCESEV( - PANIC, "Illegal combination (UCNA with AR=1)", - SER, - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR) - ), - MCESEV( - KEEP, "Non signalled machine check", - SER, BITCLR(MCI_STATUS_S) - ), - - MCESEV( - PANIC, "Action required with lost events", - SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) - ), - - /* known AR MCACODs: */ -#ifdef CONFIG_MEMORY_FAILURE - MCESEV( - KEEP, "HT thread notices Action required: data load error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), - MCGMASK(MCG_STATUS_EIPV, 0) - ), - MCESEV( - AR, "Action required: data load error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), - USER - ), -#endif - MCESEV( - PANIC, "Action required: unknown MCACOD", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) - ), - - /* known AO MCACODs: */ - MCESEV( - AO, "Action optional: memory scrubbing error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) - ), - MCESEV( - AO, "Action optional: last level cache writeback error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) - ), - MCESEV( - SOME, "Action optional: unknown MCACOD", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) - ), - MCESEV( - SOME, "Action optional with lost events", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S) - ), - - MCESEV( - PANIC, "Overflowed uncorrected", - BITSET(MCI_STATUS_OVER|MCI_STATUS_UC) - ), - MCESEV( - UC, "Uncorrected", - BITSET(MCI_STATUS_UC) - ), - MCESEV( - SOME, "No match", - BITSET(0) - ) /* always matches. keep at end */ -}; - -/* - * If mcgstatus indicated that ip/cs on the stack were - * no good, then "m->cs" will be zero and we will have - * to assume the worst case (IN_KERNEL) as we actually - * have no idea what we were executing when the machine - * check hit. - * If we do have a good "m->cs" (or a faked one in the - * case we were executing in VM86 mode) we can use it to - * distinguish an exception taken in user from from one - * taken in the kernel. - */ -static int error_context(struct mce *m) -{ - return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; -} - -int mce_severity(struct mce *m, int tolerant, char **msg) -{ - enum context ctx = error_context(m); - struct severity *s; - - for (s = severities;; s++) { - if ((m->status & s->mask) != s->result) - continue; - if ((m->mcgstatus & s->mcgmask) != s->mcgres) - continue; - if (s->ser == SER_REQUIRED && !mce_ser) - continue; - if (s->ser == NO_SER && mce_ser) - continue; - if (s->context && ctx != s->context) - continue; - if (msg) - *msg = s->msg; - s->covered = 1; - if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { - if (panic_on_oops || tolerant < 1) - return MCE_PANIC_SEVERITY; - } - return s->sev; - } -} - -#ifdef CONFIG_DEBUG_FS -static void *s_start(struct seq_file *f, loff_t *pos) -{ - if (*pos >= ARRAY_SIZE(severities)) - return NULL; - return &severities[*pos]; -} - -static void *s_next(struct seq_file *f, void *data, loff_t *pos) -{ - if (++(*pos) >= ARRAY_SIZE(severities)) - return NULL; - return &severities[*pos]; -} - -static void s_stop(struct seq_file *f, void *data) -{ -} - -static int s_show(struct seq_file *f, void *data) -{ - struct severity *ser = data; - seq_printf(f, "%d\t%s\n", ser->covered, ser->msg); - return 0; -} - -static const struct seq_operations severities_seq_ops = { - .start = s_start, - .next = s_next, - .stop = s_stop, - .show = s_show, -}; - -static int severities_coverage_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &severities_seq_ops); -} - -static ssize_t severities_coverage_write(struct file *file, - const char __user *ubuf, - size_t count, loff_t *ppos) -{ - int i; - for (i = 0; i < ARRAY_SIZE(severities); i++) - severities[i].covered = 0; - return count; -} - -static const struct file_operations severities_coverage_fops = { - .open = severities_coverage_open, - .release = seq_release, - .read = seq_read, - .write = severities_coverage_write, - .llseek = seq_lseek, -}; - -static int __init severities_debugfs_init(void) -{ - struct dentry *dmce, *fsev; - - dmce = mce_get_debugfs_dir(); - if (!dmce) - goto err_out; - - fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, - &severities_coverage_fops); - if (!fsev) - goto err_out; - - return 0; - -err_out: - return -ENOMEM; -} -late_initcall(severities_debugfs_init); -#endif /* CONFIG_DEBUG_FS */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce.c deleted file mode 100644 index 61604aef..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce.c +++ /dev/null @@ -1,2364 +0,0 @@ -/* - * Machine check handler. - * - * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. - * Rest from unknown author(s). - * 2004 Andi Kleen. Rewrote most of it. - * Copyright 2008 Intel Corporation - * Author: Andi Kleen - */ -#include <linux/thread_info.h> -#include <linux/capability.h> -#include <linux/miscdevice.h> -#include <linux/ratelimit.h> -#include <linux/kallsyms.h> -#include <linux/rcupdate.h> -#include <linux/kobject.h> -#include <linux/uaccess.h> -#include <linux/kdebug.h> -#include <linux/kernel.h> -#include <linux/percpu.h> -#include <linux/string.h> -#include <linux/device.h> -#include <linux/syscore_ops.h> -#include <linux/delay.h> -#include <linux/ctype.h> -#include <linux/sched.h> -#include <linux/sysfs.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/kmod.h> -#include <linux/poll.h> -#include <linux/nmi.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/debugfs.h> -#include <linux/irq_work.h> -#include <linux/export.h> - -#include <asm/processor.h> -#include <asm/mce.h> -#include <asm/msr.h> - -#include "mce-internal.h" - -static DEFINE_MUTEX(mce_chrdev_read_mutex); - -#define rcu_dereference_check_mce(p) \ - rcu_dereference_index_check((p), \ - rcu_read_lock_sched_held() || \ - lockdep_is_held(&mce_chrdev_read_mutex)) - -#define CREATE_TRACE_POINTS -#include <trace/events/mce.h> - -int mce_disabled __read_mostly; - -#define MISC_MCELOG_MINOR 227 - -#define SPINUNIT 100 /* 100ns */ - -atomic_t mce_entry; - -DEFINE_PER_CPU(unsigned, mce_exception_count); - -/* - * Tolerant levels: - * 0: always panic on uncorrected errors, log corrected errors - * 1: panic or SIGBUS on uncorrected errors, log corrected errors - * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors - * 3: never panic or SIGBUS, log all errors (for testing only) - */ -static int tolerant __read_mostly = 1; -static int banks __read_mostly; -static int rip_msr __read_mostly; -static int mce_bootlog __read_mostly = -1; -static int monarch_timeout __read_mostly = -1; -static int mce_panic_timeout __read_mostly; -static int mce_dont_log_ce __read_mostly; -int mce_cmci_disabled __read_mostly; -int mce_ignore_ce __read_mostly; -int mce_ser __read_mostly; - -struct mce_bank *mce_banks __read_mostly; - -/* User mode helper program triggered by machine check event */ -static unsigned long mce_need_notify; -static char mce_helper[128]; -static char *mce_helper_argv[2] = { mce_helper, NULL }; - -static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); - -static DEFINE_PER_CPU(struct mce, mces_seen); -static int cpu_missing; - -/* MCA banks polled by the period polling timer for corrected events */ -DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { - [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL -}; - -static DEFINE_PER_CPU(struct work_struct, mce_work); - -/* - * CPU/chipset specific EDAC code can register a notifier call here to print - * MCE errors in a human-readable form. - */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); - -/* Do initial initialization of a struct mce */ -void mce_setup(struct mce *m) -{ - memset(m, 0, sizeof(struct mce)); - m->cpu = m->extcpu = smp_processor_id(); - rdtscll(m->tsc); - /* We hope get_seconds stays lockless */ - m->time = get_seconds(); - m->cpuvendor = boot_cpu_data.x86_vendor; - m->cpuid = cpuid_eax(1); - m->socketid = cpu_data(m->extcpu).phys_proc_id; - m->apicid = cpu_data(m->extcpu).initial_apicid; - rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); -} - -DEFINE_PER_CPU(struct mce, injectm); -EXPORT_PER_CPU_SYMBOL_GPL(injectm); - -/* - * Lockless MCE logging infrastructure. - * This avoids deadlocks on printk locks without having to break locks. Also - * separate MCEs from kernel messages to avoid bogus bug reports. - */ - -static struct mce_log mcelog = { - .signature = MCE_LOG_SIGNATURE, - .len = MCE_LOG_LEN, - .recordlen = sizeof(struct mce), -}; - -void mce_log(struct mce *mce) -{ - unsigned next, entry; - int ret = 0; - - /* Emit the trace record: */ - trace_mce_record(mce); - - ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); - if (ret == NOTIFY_STOP) - return; - - mce->finished = 0; - wmb(); - for (;;) { - entry = rcu_dereference_check_mce(mcelog.next); - for (;;) { - - /* - * When the buffer fills up discard new entries. - * Assume that the earlier errors are the more - * interesting ones: - */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, - (unsigned long *)&mcelog.flags); - return; - } - /* Old left over entry. Skip: */ - if (mcelog.entry[entry].finished) { - entry++; - continue; - } - break; - } - smp_rmb(); - next = entry + 1; - if (cmpxchg(&mcelog.next, entry, next) == entry) - break; - } - memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); - wmb(); - mcelog.entry[entry].finished = 1; - wmb(); - - mce->finished = 1; - set_bit(0, &mce_need_notify); -} - -static void drain_mcelog_buffer(void) -{ - unsigned int next, i, prev = 0; - - next = ACCESS_ONCE(mcelog.next); - - do { - struct mce *m; - - /* drain what was logged during boot */ - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - unsigned retries = 1; - - m = &mcelog.entry[i]; - - while (!m->finished) { - if (time_after_eq(jiffies, start + 2*retries)) - retries++; - - cpu_relax(); - - if (!m->finished && retries >= 4) { - pr_err("MCE: skipping error being logged currently!\n"); - break; - } - } - smp_rmb(); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); - } - - memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); -} - - -void mce_register_decode_chain(struct notifier_block *nb) -{ - atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); - drain_mcelog_buffer(); -} -EXPORT_SYMBOL_GPL(mce_register_decode_chain); - -void mce_unregister_decode_chain(struct notifier_block *nb) -{ - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); -} -EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); - -static void print_mce(struct mce *m) -{ - int ret = 0; - - pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", - m->extcpu, m->mcgstatus, m->bank, m->status); - - if (m->ip) { - pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); - - if (m->cs == __KERNEL_CS) - print_symbol("{%s}", m->ip); - pr_cont("\n"); - } - - pr_emerg(HW_ERR "TSC %llx ", m->tsc); - if (m->addr) - pr_cont("ADDR %llx ", m->addr); - if (m->misc) - pr_cont("MISC %llx ", m->misc); - - pr_cont("\n"); - /* - * Note this output is parsed by external tools and old fields - * should not be changed. - */ - pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, - cpu_data(m->extcpu).microcode); - - /* - * Print out human-readable details about the MCE error, - * (if the CPU has an implementation for that) - */ - ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); - if (ret == NOTIFY_STOP) - return; - - pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); -} - -#define PANIC_TIMEOUT 5 /* 5 seconds */ - -static atomic_t mce_paniced; - -static int fake_panic; -static atomic_t mce_fake_paniced; - -/* Panic in progress. Enable interrupts and wait for final IPI */ -static void wait_for_panic(void) -{ - long timeout = PANIC_TIMEOUT*USEC_PER_SEC; - - preempt_disable(); - local_irq_enable(); - while (timeout-- > 0) - udelay(1); - if (panic_timeout == 0) - panic_timeout = mce_panic_timeout; - panic("Panicing machine check CPU died"); -} - -static void mce_panic(char *msg, struct mce *final, char *exp) -{ - int i, apei_err = 0; - - if (!fake_panic) { - /* - * Make sure only one CPU runs in machine check panic - */ - if (atomic_inc_return(&mce_paniced) > 1) - wait_for_panic(); - barrier(); - - bust_spinlocks(1); - console_verbose(); - } else { - /* Don't log too much for fake panic */ - if (atomic_inc_return(&mce_fake_paniced) > 1) - return; - } - /* First print corrected ones that are still unlogged */ - for (i = 0; i < MCE_LOG_LEN; i++) { - struct mce *m = &mcelog.entry[i]; - if (!(m->status & MCI_STATUS_VAL)) - continue; - if (!(m->status & MCI_STATUS_UC)) { - print_mce(m); - if (!apei_err) - apei_err = apei_write_mce(m); - } - } - /* Now print uncorrected but with the final one last */ - for (i = 0; i < MCE_LOG_LEN; i++) { - struct mce *m = &mcelog.entry[i]; - if (!(m->status & MCI_STATUS_VAL)) - continue; - if (!(m->status & MCI_STATUS_UC)) - continue; - if (!final || memcmp(m, final, sizeof(struct mce))) { - print_mce(m); - if (!apei_err) - apei_err = apei_write_mce(m); - } - } - if (final) { - print_mce(final); - if (!apei_err) - apei_err = apei_write_mce(final); - } - if (cpu_missing) - pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); - if (exp) - pr_emerg(HW_ERR "Machine check: %s\n", exp); - if (!fake_panic) { - if (panic_timeout == 0) - panic_timeout = mce_panic_timeout; - panic(msg); - } else - pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); -} - -/* Support code for software error injection */ - -static int msr_to_offset(u32 msr) -{ - unsigned bank = __this_cpu_read(injectm.bank); - - if (msr == rip_msr) - return offsetof(struct mce, ip); - if (msr == MSR_IA32_MCx_STATUS(bank)) - return offsetof(struct mce, status); - if (msr == MSR_IA32_MCx_ADDR(bank)) - return offsetof(struct mce, addr); - if (msr == MSR_IA32_MCx_MISC(bank)) - return offsetof(struct mce, misc); - if (msr == MSR_IA32_MCG_STATUS) - return offsetof(struct mce, mcgstatus); - return -1; -} - -/* MSR access wrappers used for error injection */ -static u64 mce_rdmsrl(u32 msr) -{ - u64 v; - - if (__this_cpu_read(injectm.finished)) { - int offset = msr_to_offset(msr); - - if (offset < 0) - return 0; - return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); - } - - if (rdmsrl_safe(msr, &v)) { - WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); - /* - * Return zero in case the access faulted. This should - * not happen normally but can happen if the CPU does - * something weird, or if the code is buggy. - */ - v = 0; - } - - return v; -} - -static void mce_wrmsrl(u32 msr, u64 v) -{ - if (__this_cpu_read(injectm.finished)) { - int offset = msr_to_offset(msr); - - if (offset >= 0) - *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; - return; - } - wrmsrl(msr, v); -} - -/* - * Collect all global (w.r.t. this processor) status about this machine - * check into our "mce" struct so that we can use it later to assess - * the severity of the problem as we read per-bank specific details. - */ -static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) -{ - mce_setup(m); - - m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); - if (regs) { - /* - * Get the address of the instruction at the time of - * the machine check error. - */ - if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { - m->ip = regs->ip; - m->cs = regs->cs; - - /* - * When in VM86 mode make the cs look like ring 3 - * always. This is a lie, but it's better than passing - * the additional vm86 bit around everywhere. - */ - if (v8086_mode(regs)) - m->cs |= 3; - } - /* Use accurate RIP reporting if available. */ - if (rip_msr) - m->ip = mce_rdmsrl(rip_msr); - } -} - -/* - * Simple lockless ring to communicate PFNs from the exception handler with the - * process context work function. This is vastly simplified because there's - * only a single reader and a single writer. - */ -#define MCE_RING_SIZE 16 /* we use one entry less */ - -struct mce_ring { - unsigned short start; - unsigned short end; - unsigned long ring[MCE_RING_SIZE]; -}; -static DEFINE_PER_CPU(struct mce_ring, mce_ring); - -/* Runs with CPU affinity in workqueue */ -static int mce_ring_empty(void) -{ - struct mce_ring *r = &__get_cpu_var(mce_ring); - - return r->start == r->end; -} - -static int mce_ring_get(unsigned long *pfn) -{ - struct mce_ring *r; - int ret = 0; - - *pfn = 0; - get_cpu(); - r = &__get_cpu_var(mce_ring); - if (r->start == r->end) - goto out; - *pfn = r->ring[r->start]; - r->start = (r->start + 1) % MCE_RING_SIZE; - ret = 1; -out: - put_cpu(); - return ret; -} - -/* Always runs in MCE context with preempt off */ -static int mce_ring_add(unsigned long pfn) -{ - struct mce_ring *r = &__get_cpu_var(mce_ring); - unsigned next; - - next = (r->end + 1) % MCE_RING_SIZE; - if (next == r->start) - return -1; - r->ring[r->end] = pfn; - wmb(); - r->end = next; - return 0; -} - -int mce_available(struct cpuinfo_x86 *c) -{ - if (mce_disabled) - return 0; - return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); -} - -static void mce_schedule_work(void) -{ - if (!mce_ring_empty()) { - struct work_struct *work = &__get_cpu_var(mce_work); - if (!work_pending(work)) - schedule_work(work); - } -} - -DEFINE_PER_CPU(struct irq_work, mce_irq_work); - -static void mce_irq_work_cb(struct irq_work *entry) -{ - mce_notify_irq(); - mce_schedule_work(); -} - -static void mce_report_event(struct pt_regs *regs) -{ - if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { - mce_notify_irq(); - /* - * Triggering the work queue here is just an insurance - * policy in case the syscall exit notify handler - * doesn't run soon enough or ends up running on the - * wrong CPU (can happen when audit sleeps) - */ - mce_schedule_work(); - return; - } - - irq_work_queue(&__get_cpu_var(mce_irq_work)); -} - -/* - * Read ADDR and MISC registers. - */ -static void mce_read_aux(struct mce *m, int i) -{ - if (m->status & MCI_STATUS_MISCV) - m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); - if (m->status & MCI_STATUS_ADDRV) { - m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); - - /* - * Mask the reported address by the reported granularity. - */ - if (mce_ser && (m->status & MCI_STATUS_MISCV)) { - u8 shift = MCI_MISC_ADDR_LSB(m->misc); - m->addr >>= shift; - m->addr <<= shift; - } - } -} - -DEFINE_PER_CPU(unsigned, mce_poll_count); - -/* - * Poll for corrected events or events that happened before reset. - * Those are just logged through /dev/mcelog. - * - * This is executed in standard interrupt context. - * - * Note: spec recommends to panic for fatal unsignalled - * errors here. However this would be quite problematic -- - * we would need to reimplement the Monarch handling and - * it would mess up the exclusion between exception handler - * and poll hander -- * so we skip this for now. - * These cases should not happen anyways, or only when the CPU - * is already totally * confused. In this case it's likely it will - * not fully execute the machine check handler either. - */ -void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) -{ - struct mce m; - int i; - - percpu_inc(mce_poll_count); - - mce_gather_info(&m, NULL); - - for (i = 0; i < banks; i++) { - if (!mce_banks[i].ctl || !test_bit(i, *b)) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - m.tsc = 0; - - barrier(); - m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); - if (!(m.status & MCI_STATUS_VAL)) - continue; - - /* - * Uncorrected or signalled events are handled by the exception - * handler when it is enabled, so don't process those here. - * - * TBD do the same check for MCI_STATUS_EN here? - */ - if (!(flags & MCP_UC) && - (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) - continue; - - mce_read_aux(&m, i); - - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - /* - * Don't get the IP here because it's unlikely to - * have anything to do with the actual error location. - */ - if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) - mce_log(&m); - - /* - * Clear state for this bank. - */ - mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); - } - - /* - * Don't clear MCG_STATUS here because it's only defined for - * exceptions. - */ - - sync_core(); -} -EXPORT_SYMBOL_GPL(machine_check_poll); - -/* - * Do a quick check if any of the events requires a panic. - * This decides if we keep the events around or clear them. - */ -static int mce_no_way_out(struct mce *m, char **msg) -{ - int i; - - for (i = 0; i < banks; i++) { - m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); - if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) - return 1; - } - return 0; -} - -/* - * Variable to establish order between CPUs while scanning. - * Each CPU spins initially until executing is equal its number. - */ -static atomic_t mce_executing; - -/* - * Defines order of CPUs on entry. First CPU becomes Monarch. - */ -static atomic_t mce_callin; - -/* - * Check if a timeout waiting for other CPUs happened. - */ -static int mce_timed_out(u64 *t) -{ - /* - * The others already did panic for some reason. - * Bail out like in a timeout. - * rmb() to tell the compiler that system_state - * might have been modified by someone else. - */ - rmb(); - if (atomic_read(&mce_paniced)) - wait_for_panic(); - if (!monarch_timeout) - goto out; - if ((s64)*t < SPINUNIT) { - /* CHECKME: Make panic default for 1 too? */ - if (tolerant < 1) - mce_panic("Timeout synchronizing machine check over CPUs", - NULL, NULL); - cpu_missing = 1; - return 1; - } - *t -= SPINUNIT; -out: - touch_nmi_watchdog(); - return 0; -} - -/* - * The Monarch's reign. The Monarch is the CPU who entered - * the machine check handler first. It waits for the others to - * raise the exception too and then grades them. When any - * error is fatal panic. Only then let the others continue. - * - * The other CPUs entering the MCE handler will be controlled by the - * Monarch. They are called Subjects. - * - * This way we prevent any potential data corruption in a unrecoverable case - * and also makes sure always all CPU's errors are examined. - * - * Also this detects the case of a machine check event coming from outer - * space (not detected by any CPUs) In this case some external agent wants - * us to shut down, so panic too. - * - * The other CPUs might still decide to panic if the handler happens - * in a unrecoverable place, but in this case the system is in a semi-stable - * state and won't corrupt anything by itself. It's ok to let the others - * continue for a bit first. - * - * All the spin loops have timeouts; when a timeout happens a CPU - * typically elects itself to be Monarch. - */ -static void mce_reign(void) -{ - int cpu; - struct mce *m = NULL; - int global_worst = 0; - char *msg = NULL; - char *nmsg = NULL; - - /* - * This CPU is the Monarch and the other CPUs have run - * through their handlers. - * Grade the severity of the errors of all the CPUs. - */ - for_each_possible_cpu(cpu) { - int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, - &nmsg); - if (severity > global_worst) { - msg = nmsg; - global_worst = severity; - m = &per_cpu(mces_seen, cpu); - } - } - - /* - * Cannot recover? Panic here then. - * This dumps all the mces in the log buffer and stops the - * other CPUs. - */ - if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) - mce_panic("Fatal Machine check", m, msg); - - /* - * For UC somewhere we let the CPU who detects it handle it. - * Also must let continue the others, otherwise the handling - * CPU could deadlock on a lock. - */ - - /* - * No machine check event found. Must be some external - * source or one CPU is hung. Panic. - */ - if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) - mce_panic("Machine check from unknown source", NULL, NULL); - - /* - * Now clear all the mces_seen so that they don't reappear on - * the next mce. - */ - for_each_possible_cpu(cpu) - memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); -} - -static atomic_t global_nwo; - -/* - * Start of Monarch synchronization. This waits until all CPUs have - * entered the exception handler and then determines if any of them - * saw a fatal event that requires panic. Then it executes them - * in the entry order. - * TBD double check parallel CPU hotunplug - */ -static int mce_start(int *no_way_out) -{ - int order; - int cpus = num_online_cpus(); - u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; - - if (!timeout) - return -1; - - atomic_add(*no_way_out, &global_nwo); - /* - * global_nwo should be updated before mce_callin - */ - smp_wmb(); - order = atomic_inc_return(&mce_callin); - - /* - * Wait for everyone. - */ - while (atomic_read(&mce_callin) != cpus) { - if (mce_timed_out(&timeout)) { - atomic_set(&global_nwo, 0); - return -1; - } - ndelay(SPINUNIT); - } - - /* - * mce_callin should be read before global_nwo - */ - smp_rmb(); - - if (order == 1) { - /* - * Monarch: Starts executing now, the others wait. - */ - atomic_set(&mce_executing, 1); - } else { - /* - * Subject: Now start the scanning loop one by one in - * the original callin order. - * This way when there are any shared banks it will be - * only seen by one CPU before cleared, avoiding duplicates. - */ - while (atomic_read(&mce_executing) < order) { - if (mce_timed_out(&timeout)) { - atomic_set(&global_nwo, 0); - return -1; - } - ndelay(SPINUNIT); - } - } - - /* - * Cache the global no_way_out state. - */ - *no_way_out = atomic_read(&global_nwo); - - return order; -} - -/* - * Synchronize between CPUs after main scanning loop. - * This invokes the bulk of the Monarch processing. - */ -static int mce_end(int order) -{ - int ret = -1; - u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; - - if (!timeout) - goto reset; - if (order < 0) - goto reset; - - /* - * Allow others to run. - */ - atomic_inc(&mce_executing); - - if (order == 1) { - /* CHECKME: Can this race with a parallel hotplug? */ - int cpus = num_online_cpus(); - - /* - * Monarch: Wait for everyone to go through their scanning - * loops. - */ - while (atomic_read(&mce_executing) <= cpus) { - if (mce_timed_out(&timeout)) - goto reset; - ndelay(SPINUNIT); - } - - mce_reign(); - barrier(); - ret = 0; - } else { - /* - * Subject: Wait for Monarch to finish. - */ - while (atomic_read(&mce_executing) != 0) { - if (mce_timed_out(&timeout)) - goto reset; - ndelay(SPINUNIT); - } - - /* - * Don't reset anything. That's done by the Monarch. - */ - return 0; - } - - /* - * Reset all global state. - */ -reset: - atomic_set(&global_nwo, 0); - atomic_set(&mce_callin, 0); - barrier(); - - /* - * Let others run again. - */ - atomic_set(&mce_executing, 0); - return ret; -} - -/* - * Check if the address reported by the CPU is in a format we can parse. - * It would be possible to add code for most other cases, but all would - * be somewhat complicated (e.g. segment offset would require an instruction - * parser). So only support physical addresses up to page granuality for now. - */ -static int mce_usable_address(struct mce *m) -{ - if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) - return 0; - if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) - return 0; - if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) - return 0; - return 1; -} - -static void mce_clear_state(unsigned long *toclear) -{ - int i; - - for (i = 0; i < banks; i++) { - if (test_bit(i, toclear)) - mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); - } -} - -/* - * Need to save faulting physical address associated with a process - * in the machine check handler some place where we can grab it back - * later in mce_notify_process() - */ -#define MCE_INFO_MAX 16 - -struct mce_info { - atomic_t inuse; - struct task_struct *t; - __u64 paddr; - int restartable; -} mce_info[MCE_INFO_MAX]; - -static void mce_save_info(__u64 addr, int c) -{ - struct mce_info *mi; - - for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { - if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { - mi->t = current; - mi->paddr = addr; - mi->restartable = c; - return; - } - } - - mce_panic("Too many concurrent recoverable errors", NULL, NULL); -} - -static struct mce_info *mce_find_info(void) -{ - struct mce_info *mi; - - for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) - if (atomic_read(&mi->inuse) && mi->t == current) - return mi; - return NULL; -} - -static void mce_clear_info(struct mce_info *mi) -{ - atomic_set(&mi->inuse, 0); -} - -/* - * The actual machine check handler. This only handles real - * exceptions when something got corrupted coming in through int 18. - * - * This is executed in NMI context not subject to normal locking rules. This - * implies that most kernel services cannot be safely used. Don't even - * think about putting a printk in there! - * - * On Intel systems this is entered on all CPUs in parallel through - * MCE broadcast. However some CPUs might be broken beyond repair, - * so be always careful when synchronizing with others. - */ -void do_machine_check(struct pt_regs *regs, long error_code) -{ - struct mce m, *final; - int i; - int worst = 0; - int severity; - /* - * Establish sequential order between the CPUs entering the machine - * check handler. - */ - int order; - /* - * If no_way_out gets set, there is no safe way to recover from this - * MCE. If tolerant is cranked up, we'll try anyway. - */ - int no_way_out = 0; - /* - * If kill_it gets set, there might be a way to recover from this - * error. - */ - int kill_it = 0; - DECLARE_BITMAP(toclear, MAX_NR_BANKS); - char *msg = "Unknown"; - - atomic_inc(&mce_entry); - - percpu_inc(mce_exception_count); - - if (!banks) - goto out; - - mce_gather_info(&m, regs); - - final = &__get_cpu_var(mces_seen); - *final = m; - - no_way_out = mce_no_way_out(&m, &msg); - - barrier(); - - /* - * When no restart IP might need to kill or panic. - * Assume the worst for now, but if we find the - * severity is MCE_AR_SEVERITY we have other options. - */ - if (!(m.mcgstatus & MCG_STATUS_RIPV)) - kill_it = 1; - - /* - * Go through all the banks in exclusion of the other CPUs. - * This way we don't report duplicated events on shared banks - * because the first one to see it will clear it. - */ - order = mce_start(&no_way_out); - for (i = 0; i < banks; i++) { - __clear_bit(i, toclear); - if (!mce_banks[i].ctl) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - - m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); - if ((m.status & MCI_STATUS_VAL) == 0) - continue; - - /* - * Non uncorrected or non signaled errors are handled by - * machine_check_poll. Leave them alone, unless this panics. - */ - if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && - !no_way_out) - continue; - - /* - * Set taint even when machine check was not enabled. - */ - add_taint(TAINT_MACHINE_CHECK); - - severity = mce_severity(&m, tolerant, NULL); - - /* - * When machine check was for corrected handler don't touch, - * unless we're panicing. - */ - if (severity == MCE_KEEP_SEVERITY && !no_way_out) - continue; - __set_bit(i, toclear); - if (severity == MCE_NO_SEVERITY) { - /* - * Machine check event was not enabled. Clear, but - * ignore. - */ - continue; - } - - mce_read_aux(&m, i); - - /* - * Action optional error. Queue address for later processing. - * When the ring overflows we just ignore the AO error. - * RED-PEN add some logging mechanism when - * usable_address or mce_add_ring fails. - * RED-PEN don't ignore overflow for tolerant == 0 - */ - if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) - mce_ring_add(m.addr >> PAGE_SHIFT); - - mce_log(&m); - - if (severity > worst) { - *final = m; - worst = severity; - } - } - - /* mce_clear_state will clear *final, save locally for use later */ - m = *final; - - if (!no_way_out) - mce_clear_state(toclear); - - /* - * Do most of the synchronization with other CPUs. - * When there's any problem use only local no_way_out state. - */ - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; - - /* - * At insane "tolerant" levels we take no action. Otherwise - * we only die if we have no other choice. For less serious - * issues we try to recover, or limit damage to the current - * process. - */ - if (tolerant < 3) { - if (no_way_out) - mce_panic("Fatal machine check on current CPU", &m, msg); - if (worst == MCE_AR_SEVERITY) { - /* schedule action before return to userland */ - mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); - set_thread_flag(TIF_MCE_NOTIFY); - } else if (kill_it) { - force_sig(SIGBUS, current); - } - } - - if (worst > 0) - mce_report_event(regs); - mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); -out: - atomic_dec(&mce_entry); - sync_core(); -} -EXPORT_SYMBOL_GPL(do_machine_check); - -#ifndef CONFIG_MEMORY_FAILURE -int memory_failure(unsigned long pfn, int vector, int flags) -{ - /* mce_severity() should not hand us an ACTION_REQUIRED error */ - BUG_ON(flags & MF_ACTION_REQUIRED); - printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" - "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); - - return 0; -} -#endif - -/* - * Called in process context that interrupted by MCE and marked with - * TIF_MCE_NOTIFY, just before returning to erroneous userland. - * This code is allowed to sleep. - * Attempt possible recovery such as calling the high level VM handler to - * process any corrupted pages, and kill/signal current process if required. - * Action required errors are handled here. - */ -void mce_notify_process(void) -{ - unsigned long pfn; - struct mce_info *mi = mce_find_info(); - - if (!mi) - mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); - pfn = mi->paddr >> PAGE_SHIFT; - - clear_thread_flag(TIF_MCE_NOTIFY); - - pr_err("Uncorrected hardware memory error in user-access at %llx", - mi->paddr); - /* - * We must call memory_failure() here even if the current process is - * doomed. We still need to mark the page as poisoned and alert any - * other users of the page. - */ - if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || - mi->restartable == 0) { - pr_err("Memory error not recovered"); - force_sig(SIGBUS, current); - } - mce_clear_info(mi); -} - -/* - * Action optional processing happens here (picking up - * from the list of faulting pages that do_machine_check() - * placed into the "ring"). - */ -static void mce_process_work(struct work_struct *dummy) -{ - unsigned long pfn; - - while (mce_ring_get(&pfn)) - memory_failure(pfn, MCE_VECTOR, 0); -} - -#ifdef CONFIG_X86_MCE_INTEL -/*** - * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog - * @cpu: The CPU on which the event occurred. - * @status: Event status information - * - * This function should be called by the thermal interrupt after the - * event has been processed and the decision was made to log the event - * further. - * - * The status parameter will be saved to the 'status' field of 'struct mce' - * and historically has been the register value of the - * MSR_IA32_THERMAL_STATUS (Intel) msr. - */ -void mce_log_therm_throt_event(__u64 status) -{ - struct mce m; - - mce_setup(&m); - m.bank = MCE_THERMAL_BANK; - m.status = status; - mce_log(&m); -} -#endif /* CONFIG_X86_MCE_INTEL */ - -/* - * Periodic polling timer for "silent" machine check errors. If the - * poller finds an MCE, poll 2x faster. When the poller finds no more - * errors, poll 2x slower (up to check_interval seconds). - */ -static int check_interval = 5 * 60; /* 5 minutes */ - -static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); - -static void mce_start_timer(unsigned long data) -{ - struct timer_list *t = &per_cpu(mce_timer, data); - int *n; - - WARN_ON(smp_processor_id() != data); - - if (mce_available(__this_cpu_ptr(&cpu_info))) { - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); - } - - /* - * Alert userspace if needed. If we logged an MCE, reduce the - * polling interval, otherwise increase the polling interval. - */ - n = &__get_cpu_var(mce_next_interval); - if (mce_notify_irq()) - *n = max(*n/2, HZ/100); - else - *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); - - t->expires = jiffies + *n; - add_timer_on(t, smp_processor_id()); -} - -/* Must not be called in IRQ context where del_timer_sync() can deadlock */ -static void mce_timer_delete_all(void) -{ - int cpu; - - for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); -} - -static void mce_do_trigger(struct work_struct *work) -{ - call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); -} - -static DECLARE_WORK(mce_trigger_work, mce_do_trigger); - -/* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI - * context. - */ -int mce_notify_irq(void) -{ - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - - if (test_and_clear_bit(0, &mce_need_notify)) { - /* wake processes polling /dev/mcelog */ - wake_up_interruptible(&mce_chrdev_wait); - - /* - * There is no risk of missing notifications because - * work_pending is always cleared before the function is - * executed. - */ - if (mce_helper[0] && !work_pending(&mce_trigger_work)) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); - - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(mce_notify_irq); - -static int __cpuinit __mcheck_cpu_mce_banks_init(void) -{ - int i; - - mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); - if (!mce_banks) - return -ENOMEM; - for (i = 0; i < banks; i++) { - struct mce_bank *b = &mce_banks[i]; - - b->ctl = -1ULL; - b->init = 1; - } - return 0; -} - -/* - * Initialize Machine Checks for a CPU. - */ -static int __cpuinit __mcheck_cpu_cap_init(void) -{ - unsigned b; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - - b = cap & MCG_BANKCNT_MASK; - if (!banks) - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - printk(KERN_WARNING - "MCE: Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); - b = MAX_NR_BANKS; - } - - /* Don't support asymmetric configurations today */ - WARN_ON(banks != 0 && b != banks); - banks = b; - if (!mce_banks) { - int err = __mcheck_cpu_mce_banks_init(); - - if (err) - return err; - } - - /* Use accurate RIP reporting if available. */ - if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) - rip_msr = MSR_IA32_MCG_EIP; - - if (cap & MCG_SER_P) - mce_ser = 1; - - return 0; -} - -static void __mcheck_cpu_init_generic(void) -{ - mce_banks_t all_banks; - u64 cap; - int i; - - /* - * Log the machine checks left over from the previous reset. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); - - set_in_cr4(X86_CR4_MCE); - - rdmsrl(MSR_IA32_MCG_CAP, cap); - if (cap & MCG_CTL_P) - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - - for (i = 0; i < banks; i++) { - struct mce_bank *b = &mce_banks[i]; - - if (!b->init) - continue; - wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); - wrmsrl(MSR_IA32_MCx_STATUS(i), 0); - } -} - -/* Add per CPU specific workarounds here */ -static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) -{ - if (c->x86_vendor == X86_VENDOR_UNKNOWN) { - pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); - return -EOPNOTSUPP; - } - - /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && banks > 4) { - /* - * disable GART TBL walk error reporting, which - * trips off incorrectly with the IOMMU & 3ware - * & Cerberus: - */ - clear_bit(10, (unsigned long *)&mce_banks[4].ctl); - } - if (c->x86 <= 17 && mce_bootlog < 0) { - /* - * Lots of broken BIOS around that don't clear them - * by default and leave crap in there. Don't log: - */ - mce_bootlog = 0; - } - /* - * Various K7s with broken bank 0 around. Always disable - * by default. - */ - if (c->x86 == 6 && banks > 0) - mce_banks[0].ctl = 0; - } - - if (c->x86_vendor == X86_VENDOR_INTEL) { - /* - * SDM documents that on family 6 bank 0 should not be written - * because it aliases to another special BIOS controlled - * register. - * But it's not aliased anymore on model 0x1a+ - * Don't ignore bank 0 completely because there could be a - * valid event later, merely don't write CTL0. - */ - - if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) - mce_banks[0].init = 0; - - /* - * All newer Intel systems support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - monarch_timeout < 0) - monarch_timeout = USEC_PER_SEC; - - /* - * There are also broken BIOSes on some Pentium M and - * earlier systems: - */ - if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) - mce_bootlog = 0; - } - if (monarch_timeout < 0) - monarch_timeout = 0; - if (mce_bootlog != 0) - mce_panic_timeout = 30; - - return 0; -} - -static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) -{ - if (c->x86 != 5) - return 0; - - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - intel_p5_mcheck_init(c); - return 1; - break; - case X86_VENDOR_CENTAUR: - winchip_mcheck_init(c); - return 1; - break; - } - - return 0; -} - -static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) -{ - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - mce_intel_feature_init(c); - break; - case X86_VENDOR_AMD: - mce_amd_feature_init(c); - break; - default: - break; - } -} - -static void __mcheck_cpu_init_timer(void) -{ - struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(mce_next_interval); - - setup_timer(t, mce_start_timer, smp_processor_id()); - - if (mce_ignore_ce) - return; - - *n = check_interval * HZ; - if (!*n) - return; - t->expires = round_jiffies(jiffies + *n); - add_timer_on(t, smp_processor_id()); -} - -/* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", - smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - -/* - * Called for each booted CPU to set up machine checks. - * Must be called with preempt off: - */ -void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled) - return; - - if (__mcheck_cpu_ancient_init(c)) - return; - - if (!mce_available(c)) - return; - - if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { - mce_disabled = 1; - return; - } - - machine_check_vector = do_machine_check; - - __mcheck_cpu_init_generic(); - __mcheck_cpu_init_vendor(c); - __mcheck_cpu_init_timer(); - INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); -} - -/* - * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. - */ - -static DEFINE_SPINLOCK(mce_chrdev_state_lock); -static int mce_chrdev_open_count; /* #times opened */ -static int mce_chrdev_open_exclu; /* already open exclusive? */ - -static int mce_chrdev_open(struct inode *inode, struct file *file) -{ - spin_lock(&mce_chrdev_state_lock); - - if (mce_chrdev_open_exclu || - (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { - spin_unlock(&mce_chrdev_state_lock); - - return -EBUSY; - } - - if (file->f_flags & O_EXCL) - mce_chrdev_open_exclu = 1; - mce_chrdev_open_count++; - - spin_unlock(&mce_chrdev_state_lock); - - return nonseekable_open(inode, file); -} - -static int mce_chrdev_release(struct inode *inode, struct file *file) -{ - spin_lock(&mce_chrdev_state_lock); - - mce_chrdev_open_count--; - mce_chrdev_open_exclu = 0; - - spin_unlock(&mce_chrdev_state_lock); - - return 0; -} - -static void collect_tscs(void *data) -{ - unsigned long *cpu_tsc = (unsigned long *)data; - - rdtscll(cpu_tsc[smp_processor_id()]); -} - -static int mce_apei_read_done; - -/* Collect MCE record of previous boot in persistent storage via APEI ERST. */ -static int __mce_read_apei(char __user **ubuf, size_t usize) -{ - int rc; - u64 record_id; - struct mce m; - - if (usize < sizeof(struct mce)) - return -EINVAL; - - rc = apei_read_mce(&m, &record_id); - /* Error or no more MCE record */ - if (rc <= 0) { - mce_apei_read_done = 1; - /* - * When ERST is disabled, mce_chrdev_read() should return - * "no record" instead of "no device." - */ - if (rc == -ENODEV) - return 0; - return rc; - } - rc = -EFAULT; - if (copy_to_user(*ubuf, &m, sizeof(struct mce))) - return rc; - /* - * In fact, we should have cleared the record after that has - * been flushed to the disk or sent to network in - * /sbin/mcelog, but we have no interface to support that now, - * so just clear it to avoid duplication. - */ - rc = apei_clear_mce(record_id); - if (rc) { - mce_apei_read_done = 1; - return rc; - } - *ubuf += sizeof(struct mce); - - return 0; -} - -static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, - size_t usize, loff_t *off) -{ - char __user *buf = ubuf; - unsigned long *cpu_tsc; - unsigned prev, next; - int i, err; - - cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); - if (!cpu_tsc) - return -ENOMEM; - - mutex_lock(&mce_chrdev_read_mutex); - - if (!mce_apei_read_done) { - err = __mce_read_apei(&buf, usize); - if (err || buf != ubuf) - goto out; - } - - next = rcu_dereference_check_mce(mcelog.next); - - /* Only supports full reads right now */ - err = -EINVAL; - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) - goto out; - - err = 0; - prev = 0; - do { - for (i = prev; i < next; i++) { - unsigned long start = jiffies; - struct mce *m = &mcelog.entry[i]; - - while (!m->finished) { - if (time_after_eq(jiffies, start + 2)) { - memset(m, 0, sizeof(*m)); - goto timeout; - } - cpu_relax(); - } - smp_rmb(); - err |= copy_to_user(buf, m, sizeof(*m)); - buf += sizeof(*m); -timeout: - ; - } - - memset(mcelog.entry + prev, 0, - (next - prev) * sizeof(struct mce)); - prev = next; - next = cmpxchg(&mcelog.next, prev, 0); - } while (next != prev); - - synchronize_sched(); - - /* - * Collect entries that were still getting written before the - * synchronize. - */ - on_each_cpu(collect_tscs, cpu_tsc, 1); - - for (i = next; i < MCE_LOG_LEN; i++) { - struct mce *m = &mcelog.entry[i]; - - if (m->finished && m->tsc < cpu_tsc[m->cpu]) { - err |= copy_to_user(buf, m, sizeof(*m)); - smp_rmb(); - buf += sizeof(*m); - memset(m, 0, sizeof(*m)); - } - } - - if (err) - err = -EFAULT; - -out: - mutex_unlock(&mce_chrdev_read_mutex); - kfree(cpu_tsc); - - return err ? err : buf - ubuf; -} - -static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) -{ - poll_wait(file, &mce_chrdev_wait, wait); - if (rcu_access_index(mcelog.next)) - return POLLIN | POLLRDNORM; - if (!mce_apei_read_done && apei_check_mce()) - return POLLIN | POLLRDNORM; - return 0; -} - -static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, - unsigned long arg) -{ - int __user *p = (int __user *)arg; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - switch (cmd) { - case MCE_GET_RECORD_LEN: - return put_user(sizeof(struct mce), p); - case MCE_GET_LOG_LEN: - return put_user(MCE_LOG_LEN, p); - case MCE_GETCLEAR_FLAGS: { - unsigned flags; - - do { - flags = mcelog.flags; - } while (cmpxchg(&mcelog.flags, flags, 0) != flags); - - return put_user(flags, p); - } - default: - return -ENOTTY; - } -} - -static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf, - size_t usize, loff_t *off); - -void register_mce_write_callback(ssize_t (*fn)(struct file *filp, - const char __user *ubuf, - size_t usize, loff_t *off)) -{ - mce_write = fn; -} -EXPORT_SYMBOL_GPL(register_mce_write_callback); - -ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, - size_t usize, loff_t *off) -{ - if (mce_write) - return mce_write(filp, ubuf, usize, off); - else - return -EINVAL; -} - -static const struct file_operations mce_chrdev_ops = { - .open = mce_chrdev_open, - .release = mce_chrdev_release, - .read = mce_chrdev_read, - .write = mce_chrdev_write, - .poll = mce_chrdev_poll, - .unlocked_ioctl = mce_chrdev_ioctl, - .llseek = no_llseek, -}; - -static struct miscdevice mce_chrdev_device = { - MISC_MCELOG_MINOR, - "mcelog", - &mce_chrdev_ops, -}; - -/* - * mce=off Disables machine check - * mce=no_cmci Disables CMCI - * mce=dont_log_ce Clears corrected events silently, no log created for CEs. - * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. - * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) - * monarchtimeout is how long to wait for other CPUs on machine - * check, or 0 to not wait - * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. - * mce=nobootlog Don't log MCEs from before booting. - */ -static int __init mcheck_enable(char *str) -{ - if (*str == 0) { - enable_p5_mce(); - return 1; - } - if (*str == '=') - str++; - if (!strcmp(str, "off")) - mce_disabled = 1; - else if (!strcmp(str, "no_cmci")) - mce_cmci_disabled = 1; - else if (!strcmp(str, "dont_log_ce")) - mce_dont_log_ce = 1; - else if (!strcmp(str, "ignore_ce")) - mce_ignore_ce = 1; - else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) - mce_bootlog = (str[0] == 'b'); - else if (isdigit(str[0])) { - get_option(&str, &tolerant); - if (*str == ',') { - ++str; - get_option(&str, &monarch_timeout); - } - } else { - printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", - str); - return 0; - } - return 1; -} -__setup("mce", mcheck_enable); - -int __init mcheck_init(void) -{ - mcheck_intel_therm_init(); - - return 0; -} - -/* - * mce_syscore: PM support - */ - -/* - * Disable machine checks on suspend and shutdown. We can't really handle - * them later. - */ -static int mce_disable_error_reporting(void) -{ - int i; - - for (i = 0; i < banks; i++) { - struct mce_bank *b = &mce_banks[i]; - - if (b->init) - wrmsrl(MSR_IA32_MCx_CTL(i), 0); - } - return 0; -} - -static int mce_syscore_suspend(void) -{ - return mce_disable_error_reporting(); -} - -static void mce_syscore_shutdown(void) -{ - mce_disable_error_reporting(); -} - -/* - * On resume clear all MCE state. Don't want to see leftovers from the BIOS. - * Only one CPU is active at this time, the others get re-added later using - * CPU hotplug: - */ -static void mce_syscore_resume(void) -{ - __mcheck_cpu_init_generic(); - __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); -} - -static struct syscore_ops mce_syscore_ops = { - .suspend = mce_syscore_suspend, - .shutdown = mce_syscore_shutdown, - .resume = mce_syscore_resume, -}; - -/* - * mce_device: Sysfs support - */ - -static void mce_cpu_restart(void *data) -{ - if (!mce_available(__this_cpu_ptr(&cpu_info))) - return; - __mcheck_cpu_init_generic(); - __mcheck_cpu_init_timer(); -} - -/* Reinit MCEs after user configuration changes */ -static void mce_restart(void) -{ - mce_timer_delete_all(); - on_each_cpu(mce_cpu_restart, NULL, 1); -} - -/* Toggle features for corrected errors */ -static void mce_disable_cmci(void *data) -{ - if (!mce_available(__this_cpu_ptr(&cpu_info))) - return; - cmci_clear(); -} - -static void mce_enable_ce(void *all) -{ - if (!mce_available(__this_cpu_ptr(&cpu_info))) - return; - cmci_reenable(); - cmci_recheck(); - if (all) - __mcheck_cpu_init_timer(); -} - -static struct bus_type mce_subsys = { - .name = "machinecheck", - .dev_name = "machinecheck", -}; - -DEFINE_PER_CPU(struct device *, mce_device); - -__cpuinitdata -void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); - -static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) -{ - return container_of(attr, struct mce_bank, attr); -} - -static ssize_t show_bank(struct device *s, struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); -} - -static ssize_t set_bank(struct device *s, struct device_attribute *attr, - const char *buf, size_t size) -{ - u64 new; - - if (strict_strtoull(buf, 0, &new) < 0) - return -EINVAL; - - attr_to_bank(attr)->ctl = new; - mce_restart(); - - return size; -} - -static ssize_t -show_trigger(struct device *s, struct device_attribute *attr, char *buf) -{ - strcpy(buf, mce_helper); - strcat(buf, "\n"); - return strlen(mce_helper) + 1; -} - -static ssize_t set_trigger(struct device *s, struct device_attribute *attr, - const char *buf, size_t siz) -{ - char *p; - - strncpy(mce_helper, buf, sizeof(mce_helper)); - mce_helper[sizeof(mce_helper)-1] = 0; - p = strchr(mce_helper, '\n'); - - if (p) - *p = 0; - - return strlen(mce_helper) + !!p; -} - -static ssize_t set_ignore_ce(struct device *s, - struct device_attribute *attr, - const char *buf, size_t size) -{ - u64 new; - - if (strict_strtoull(buf, 0, &new) < 0) - return -EINVAL; - - if (mce_ignore_ce ^ !!new) { - if (new) { - /* disable ce features */ - mce_timer_delete_all(); - on_each_cpu(mce_disable_cmci, NULL, 1); - mce_ignore_ce = 1; - } else { - /* enable ce features */ - mce_ignore_ce = 0; - on_each_cpu(mce_enable_ce, (void *)1, 1); - } - } - return size; -} - -static ssize_t set_cmci_disabled(struct device *s, - struct device_attribute *attr, - const char *buf, size_t size) -{ - u64 new; - - if (strict_strtoull(buf, 0, &new) < 0) - return -EINVAL; - - if (mce_cmci_disabled ^ !!new) { - if (new) { - /* disable cmci */ - on_each_cpu(mce_disable_cmci, NULL, 1); - mce_cmci_disabled = 1; - } else { - /* enable cmci */ - mce_cmci_disabled = 0; - on_each_cpu(mce_enable_ce, NULL, 1); - } - } - return size; -} - -static ssize_t store_int_with_restart(struct device *s, - struct device_attribute *attr, - const char *buf, size_t size) -{ - ssize_t ret = device_store_int(s, attr, buf, size); - mce_restart(); - return ret; -} - -static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); -static DEVICE_INT_ATTR(tolerant, 0644, tolerant); -static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); -static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); - -static struct dev_ext_attribute dev_attr_check_interval = { - __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), - &check_interval -}; - -static struct dev_ext_attribute dev_attr_ignore_ce = { - __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), - &mce_ignore_ce -}; - -static struct dev_ext_attribute dev_attr_cmci_disabled = { - __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), - &mce_cmci_disabled -}; - -static struct device_attribute *mce_device_attrs[] = { - &dev_attr_tolerant.attr, - &dev_attr_check_interval.attr, - &dev_attr_trigger, - &dev_attr_monarch_timeout.attr, - &dev_attr_dont_log_ce.attr, - &dev_attr_ignore_ce.attr, - &dev_attr_cmci_disabled.attr, - NULL -}; - -static cpumask_var_t mce_device_initialized; - -static void mce_device_release(struct device *dev) -{ - kfree(dev); -} - -/* Per cpu device init. All of the cpus still share the same ctrl bank: */ -static __cpuinit int mce_device_create(unsigned int cpu) -{ - struct device *dev; - int err; - int i, j; - - if (!mce_available(&boot_cpu_data)) - return -EIO; - - dev = kzalloc(sizeof *dev, GFP_KERNEL); - if (!dev) - return -ENOMEM; - dev->id = cpu; - dev->bus = &mce_subsys; - dev->release = &mce_device_release; - - err = device_register(dev); - if (err) - return err; - - for (i = 0; mce_device_attrs[i]; i++) { - err = device_create_file(dev, mce_device_attrs[i]); - if (err) - goto error; - } - for (j = 0; j < banks; j++) { - err = device_create_file(dev, &mce_banks[j].attr); - if (err) - goto error2; - } - cpumask_set_cpu(cpu, mce_device_initialized); - per_cpu(mce_device, cpu) = dev; - - return 0; -error2: - while (--j >= 0) - device_remove_file(dev, &mce_banks[j].attr); -error: - while (--i >= 0) - device_remove_file(dev, mce_device_attrs[i]); - - device_unregister(dev); - - return err; -} - -static __cpuinit void mce_device_remove(unsigned int cpu) -{ - struct device *dev = per_cpu(mce_device, cpu); - int i; - - if (!cpumask_test_cpu(cpu, mce_device_initialized)) - return; - - for (i = 0; mce_device_attrs[i]; i++) - device_remove_file(dev, mce_device_attrs[i]); - - for (i = 0; i < banks; i++) - device_remove_file(dev, &mce_banks[i].attr); - - device_unregister(dev); - cpumask_clear_cpu(cpu, mce_device_initialized); - per_cpu(mce_device, cpu) = NULL; -} - -/* Make sure there are no machine checks on offlined CPUs. */ -static void __cpuinit mce_disable_cpu(void *h) -{ - unsigned long action = *(unsigned long *)h; - int i; - - if (!mce_available(__this_cpu_ptr(&cpu_info))) - return; - - if (!(action & CPU_TASKS_FROZEN)) - cmci_clear(); - for (i = 0; i < banks; i++) { - struct mce_bank *b = &mce_banks[i]; - - if (b->init) - wrmsrl(MSR_IA32_MCx_CTL(i), 0); - } -} - -static void __cpuinit mce_reenable_cpu(void *h) -{ - unsigned long action = *(unsigned long *)h; - int i; - - if (!mce_available(__this_cpu_ptr(&cpu_info))) - return; - - if (!(action & CPU_TASKS_FROZEN)) - cmci_reenable(); - for (i = 0; i < banks; i++) { - struct mce_bank *b = &mce_banks[i]; - - if (b->init) - wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); - } -} - -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int __cpuinit -mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - mce_device_create(cpu); - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - mce_device_remove(cpu); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - if (!mce_ignore_ce && check_interval) { - t->expires = round_jiffies(jiffies + - __get_cpu_var(mce_next_interval)); - add_timer_on(t, cpu); - } - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - break; - case CPU_POST_DEAD: - /* intentionally ignoring frozen here */ - cmci_rediscover(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block mce_cpu_notifier __cpuinitdata = { - .notifier_call = mce_cpu_callback, -}; - -static __init void mce_init_banks(void) -{ - int i; - - for (i = 0; i < banks; i++) { - struct mce_bank *b = &mce_banks[i]; - struct device_attribute *a = &b->attr; - - sysfs_attr_init(&a->attr); - a->attr.name = b->attrname; - snprintf(b->attrname, ATTR_LEN, "bank%d", i); - - a->attr.mode = 0644; - a->show = show_bank; - a->store = set_bank; - } -} - -static __init int mcheck_init_device(void) -{ - int err; - int i = 0; - - if (!mce_available(&boot_cpu_data)) - return -EIO; - - zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); - - mce_init_banks(); - - err = subsys_system_register(&mce_subsys, NULL); - if (err) - return err; - - for_each_online_cpu(i) { - err = mce_device_create(i); - if (err) - return err; - } - - register_syscore_ops(&mce_syscore_ops); - register_hotcpu_notifier(&mce_cpu_notifier); - - /* register character device /dev/mcelog */ - misc_register(&mce_chrdev_device); - - return err; -} -device_initcall(mcheck_init_device); - -/* - * Old style boot options parsing. Only for compatibility. - */ -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} -__setup("nomce", mcheck_disable); - -#ifdef CONFIG_DEBUG_FS -struct dentry *mce_get_debugfs_dir(void) -{ - static struct dentry *dmce; - - if (!dmce) - dmce = debugfs_create_dir("mce", NULL); - - return dmce; -} - -static void mce_reset(void) -{ - cpu_missing = 0; - atomic_set(&mce_fake_paniced, 0); - atomic_set(&mce_executing, 0); - atomic_set(&mce_callin, 0); - atomic_set(&global_nwo, 0); -} - -static int fake_panic_get(void *data, u64 *val) -{ - *val = fake_panic; - return 0; -} - -static int fake_panic_set(void *data, u64 val) -{ - mce_reset(); - fake_panic = val; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, - fake_panic_set, "%llu\n"); - -static int __init mcheck_debugfs_init(void) -{ - struct dentry *dmce, *ffake_panic; - - dmce = mce_get_debugfs_dir(); - if (!dmce) - return -ENOMEM; - ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, - &fake_panic_fops); - if (!ffake_panic) - return -ENOMEM; - - return 0; -} -late_initcall(mcheck_debugfs_init); -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce_amd.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce_amd.c deleted file mode 100644 index 2c1d178b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ /dev/null @@ -1,775 +0,0 @@ -/* - * (c) 2005, 2006 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - * - * Written by Jacob Shin - AMD, Inc. - * - * Support : jacob.shin@amd.com - * - * April 2006 - * - added support for AMD Family 0x10 processors - * - * All MC4_MISCi registers are shared between multi-cores - */ -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/kobject.h> -#include <linux/percpu.h> -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/sysfs.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/cpu.h> -#include <linux/smp.h> - -#include <asm/apic.h> -#include <asm/idle.h> -#include <asm/mce.h> -#include <asm/msr.h> - -#define NR_BANKS 6 -#define NR_BLOCKS 9 -#define THRESHOLD_MAX 0xFFF -#define INT_TYPE_APIC 0x00020000 -#define MASK_VALID_HI 0x80000000 -#define MASK_CNTP_HI 0x40000000 -#define MASK_LOCKED_HI 0x20000000 -#define MASK_LVTOFF_HI 0x00F00000 -#define MASK_COUNT_EN_HI 0x00080000 -#define MASK_INT_TYPE_HI 0x00060000 -#define MASK_OVERFLOW_HI 0x00010000 -#define MASK_ERR_COUNT_HI 0x00000FFF -#define MASK_BLKPTR_LO 0xFF000000 -#define MCG_XBLK_ADDR 0xC0000400 - -struct threshold_block { - unsigned int block; - unsigned int bank; - unsigned int cpu; - u32 address; - u16 interrupt_enable; - bool interrupt_capable; - u16 threshold_limit; - struct kobject kobj; - struct list_head miscj; -}; - -struct threshold_bank { - struct kobject *kobj; - struct threshold_block *blocks; - cpumask_var_t cpus; -}; -static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); - -static unsigned char shared_bank[NR_BANKS] = { - 0, 0, 0, 0, 1 -}; - -static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ - -static void amd_threshold_interrupt(void); - -/* - * CPU Initialization - */ - -struct thresh_restart { - struct threshold_block *b; - int reset; - int set_lvt_off; - int lvt_off; - u16 old_limit; -}; - -static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) -{ - /* - * bank 4 supports APIC LVT interrupts implicitly since forever. - */ - if (bank == 4) - return true; - - /* - * IntP: interrupt present; if this bit is set, the thresholding - * bank can generate APIC LVT interrupts - */ - return msr_high_bits & BIT(28); -} - -static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) -{ - int msr = (hi & MASK_LVTOFF_HI) >> 20; - - if (apic < 0) { - pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " - "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, - b->bank, b->block, b->address, hi, lo); - return 0; - } - - if (apic != msr) { - pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " - "for bank %d, block %d (MSR%08X=0x%x%08x)\n", - b->cpu, apic, b->bank, b->block, b->address, hi, lo); - return 0; - } - - return 1; -}; - -/* - * Called via smp_call_function_single(), must be called with correct - * cpu affinity. - */ -static void threshold_restart_bank(void *_tr) -{ - struct thresh_restart *tr = _tr; - u32 hi, lo; - - rdmsr(tr->b->address, lo, hi); - - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); - } else if (tr->old_limit) { /* change limit w/o reset */ - int new_count = (hi & THRESHOLD_MAX) + - (tr->old_limit - tr->b->threshold_limit); - - hi = (hi & ~MASK_ERR_COUNT_HI) | - (new_count & THRESHOLD_MAX); - } - - /* clear IntType */ - hi &= ~MASK_INT_TYPE_HI; - - if (!tr->b->interrupt_capable) - goto done; - - if (tr->set_lvt_off) { - if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { - /* set new lvt offset */ - hi &= ~MASK_LVTOFF_HI; - hi |= tr->lvt_off << 20; - } - } - - if (tr->b->interrupt_enable) - hi |= INT_TYPE_APIC; - - done: - - hi |= MASK_COUNT_EN_HI; - wrmsr(tr->b->address, lo, hi); -} - -static void mce_threshold_block_init(struct threshold_block *b, int offset) -{ - struct thresh_restart tr = { - .b = b, - .set_lvt_off = 1, - .lvt_off = offset, - }; - - b->threshold_limit = THRESHOLD_MAX; - threshold_restart_bank(&tr); -}; - -static int setup_APIC_mce(int reserved, int new) -{ - if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, - APIC_EILVT_MSG_FIX, 0)) - return new; - - return reserved; -} - -/* cpu init entry point, called from mce.c with preempt off */ -void mce_amd_feature_init(struct cpuinfo_x86 *c) -{ - struct threshold_block b; - unsigned int cpu = smp_processor_id(); - u32 low = 0, high = 0, address = 0; - unsigned int bank, block; - int offset = -1; - - for (bank = 0; bank < NR_BANKS; ++bank) { - for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) - address = MSR_IA32_MC0_MISC + bank * 4; - else if (block == 1) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - break; - - address += MCG_XBLK_ADDR; - } else - ++address; - - if (rdmsr_safe(address, &low, &high)) - break; - - if (!(high & MASK_VALID_HI)) - continue; - - if (!(high & MASK_CNTP_HI) || - (high & MASK_LOCKED_HI)) - continue; - - if (!block) - per_cpu(bank_map, cpu) |= (1 << bank); - if (shared_bank[bank] && c->cpu_core_id) - break; - - memset(&b, 0, sizeof(b)); - b.cpu = cpu; - b.bank = bank; - b.block = block; - b.address = address; - b.interrupt_capable = lvt_interrupt_supported(bank, high); - - if (b.interrupt_capable) { - int new = (high & MASK_LVTOFF_HI) >> 20; - offset = setup_APIC_mce(offset, new); - } - - mce_threshold_block_init(&b, offset); - mce_threshold_vector = amd_threshold_interrupt; - } - } -} - -/* - * APIC Interrupt Handler - */ - -/* - * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. - * the interrupt goes off when error_count reaches threshold_limit. - * the handler will simply log mcelog w/ software defined bank number. - */ -static void amd_threshold_interrupt(void) -{ - u32 low = 0, high = 0, address = 0; - unsigned int bank, block; - struct mce m; - - mce_setup(&m); - - /* assume first bank caused it */ - for (bank = 0; bank < NR_BANKS; ++bank) { - if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) - continue; - for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) { - address = MSR_IA32_MC0_MISC + bank * 4; - } else if (block == 1) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - break; - address += MCG_XBLK_ADDR; - } else { - ++address; - } - - if (rdmsr_safe(address, &low, &high)) - break; - - if (!(high & MASK_VALID_HI)) { - if (block) - continue; - else - break; - } - - if (!(high & MASK_CNTP_HI) || - (high & MASK_LOCKED_HI)) - continue; - - /* - * Log the machine check that caused the threshold - * event. - */ - machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); - - if (high & MASK_OVERFLOW_HI) { - rdmsrl(address, m.misc); - rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, - m.status); - m.bank = K8_MCE_THRESHOLD_BASE - + bank * NR_BLOCKS - + block; - mce_log(&m); - return; - } - } - } -} - -/* - * Sysfs Interface - */ - -struct threshold_attr { - struct attribute attr; - ssize_t (*show) (struct threshold_block *, char *); - ssize_t (*store) (struct threshold_block *, const char *, size_t count); -}; - -#define SHOW_FIELDS(name) \ -static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ -{ \ - return sprintf(buf, "%lx\n", (unsigned long) b->name); \ -} -SHOW_FIELDS(interrupt_enable) -SHOW_FIELDS(threshold_limit) - -static ssize_t -store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) -{ - struct thresh_restart tr; - unsigned long new; - - if (!b->interrupt_capable) - return -EINVAL; - - if (strict_strtoul(buf, 0, &new) < 0) - return -EINVAL; - - b->interrupt_enable = !!new; - - memset(&tr, 0, sizeof(tr)); - tr.b = b; - - smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - - return size; -} - -static ssize_t -store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) -{ - struct thresh_restart tr; - unsigned long new; - - if (strict_strtoul(buf, 0, &new) < 0) - return -EINVAL; - - if (new > THRESHOLD_MAX) - new = THRESHOLD_MAX; - if (new < 1) - new = 1; - - memset(&tr, 0, sizeof(tr)); - tr.old_limit = b->threshold_limit; - b->threshold_limit = new; - tr.b = b; - - smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - - return size; -} - -struct threshold_block_cross_cpu { - struct threshold_block *tb; - long retval; -}; - -static void local_error_count_handler(void *_tbcc) -{ - struct threshold_block_cross_cpu *tbcc = _tbcc; - struct threshold_block *b = tbcc->tb; - u32 low, high; - - rdmsr(b->address, low, high); - tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); -} - -static ssize_t show_error_count(struct threshold_block *b, char *buf) -{ - struct threshold_block_cross_cpu tbcc = { .tb = b, }; - - smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); - return sprintf(buf, "%lx\n", tbcc.retval); -} - -static ssize_t store_error_count(struct threshold_block *b, - const char *buf, size_t count) -{ - struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; - - smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - return 1; -} - -#define RW_ATTR(val) \ -static struct threshold_attr val = { \ - .attr = {.name = __stringify(val), .mode = 0644 }, \ - .show = show_## val, \ - .store = store_## val, \ -}; - -RW_ATTR(interrupt_enable); -RW_ATTR(threshold_limit); -RW_ATTR(error_count); - -static struct attribute *default_attrs[] = { - &interrupt_enable.attr, - &threshold_limit.attr, - &error_count.attr, - NULL -}; - -#define to_block(k) container_of(k, struct threshold_block, kobj) -#define to_attr(a) container_of(a, struct threshold_attr, attr) - -static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) -{ - struct threshold_block *b = to_block(kobj); - struct threshold_attr *a = to_attr(attr); - ssize_t ret; - - ret = a->show ? a->show(b, buf) : -EIO; - - return ret; -} - -static ssize_t store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - struct threshold_block *b = to_block(kobj); - struct threshold_attr *a = to_attr(attr); - ssize_t ret; - - ret = a->store ? a->store(b, buf, count) : -EIO; - - return ret; -} - -static const struct sysfs_ops threshold_ops = { - .show = show, - .store = store, -}; - -static struct kobj_type threshold_ktype = { - .sysfs_ops = &threshold_ops, - .default_attrs = default_attrs, -}; - -static __cpuinit int allocate_threshold_blocks(unsigned int cpu, - unsigned int bank, - unsigned int block, - u32 address) -{ - struct threshold_block *b = NULL; - u32 low, high; - int err; - - if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) - return 0; - - if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) - return 0; - - if (!(high & MASK_VALID_HI)) { - if (block) - goto recurse; - else - return 0; - } - - if (!(high & MASK_CNTP_HI) || - (high & MASK_LOCKED_HI)) - goto recurse; - - b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL); - if (!b) - return -ENOMEM; - - b->block = block; - b->bank = bank; - b->cpu = cpu; - b->address = address; - b->interrupt_enable = 0; - b->interrupt_capable = lvt_interrupt_supported(bank, high); - b->threshold_limit = THRESHOLD_MAX; - - INIT_LIST_HEAD(&b->miscj); - - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } - - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - "misc%i", block); - if (err) - goto out_free; -recurse: - if (!block) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - return 0; - address += MCG_XBLK_ADDR; - } else { - ++address; - } - - err = allocate_threshold_blocks(cpu, bank, ++block, address); - if (err) - goto out_free; - - if (b) - kobject_uevent(&b->kobj, KOBJ_ADD); - - return err; - -out_free: - if (b) { - kobject_put(&b->kobj); - list_del(&b->miscj); - kfree(b); - } - return err; -} - -static __cpuinit long -local_allocate_threshold_blocks(int cpu, unsigned int bank) -{ - return allocate_threshold_blocks(cpu, bank, 0, - MSR_IA32_MC0_MISC + bank * 4); -} - -/* symlinks sibling shared banks to first core. first core owns dir/files. */ -static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) -{ - int i, err = 0; - struct threshold_bank *b = NULL; - struct device *dev = per_cpu(mce_device, cpu); - char name[32]; - - sprintf(name, "threshold_bank%i", bank); - -#ifdef CONFIG_SMP - if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = cpumask_first(cpu_llc_shared_mask(cpu)); - - /* first core not up yet */ - if (cpu_data(i).cpu_core_id) - goto out; - - /* already linked */ - if (per_cpu(threshold_banks, cpu)[bank]) - goto out; - - b = per_cpu(threshold_banks, i)[bank]; - - if (!b) - goto out; - - err = sysfs_create_link(&dev->kobj, b->kobj, name); - if (err) - goto out; - - cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); - per_cpu(threshold_banks, cpu)[bank] = b; - - goto out; - } -#endif - - b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); - if (!b) { - err = -ENOMEM; - goto out; - } - if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { - kfree(b); - err = -ENOMEM; - goto out; - } - - b->kobj = kobject_create_and_add(name, &dev->kobj); - if (!b->kobj) - goto out_free; - -#ifndef CONFIG_SMP - cpumask_setall(b->cpus); -#else - cpumask_set_cpu(cpu, b->cpus); -#endif - - per_cpu(threshold_banks, cpu)[bank] = b; - - err = local_allocate_threshold_blocks(cpu, bank); - if (err) - goto out_free; - - for_each_cpu(i, b->cpus) { - if (i == cpu) - continue; - - dev = per_cpu(mce_device, i); - if (dev) - err = sysfs_create_link(&dev->kobj,b->kobj, name); - if (err) - goto out; - - per_cpu(threshold_banks, i)[bank] = b; - } - - goto out; - -out_free: - per_cpu(threshold_banks, cpu)[bank] = NULL; - free_cpumask_var(b->cpus); - kfree(b); -out: - return err; -} - -/* create dir/files for all valid threshold banks */ -static __cpuinit int threshold_create_device(unsigned int cpu) -{ - unsigned int bank; - int err = 0; - - for (bank = 0; bank < NR_BANKS; ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) - continue; - err = threshold_create_bank(cpu, bank); - if (err) - return err; - } - - return err; -} - -/* - * let's be hotplug friendly. - * in case of multiple core processors, the first core always takes ownership - * of shared sysfs dir/files, and rest of the cores will be symlinked to it. - */ - -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) -{ - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank]; - - if (!head) - return; - - list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); - list_del(&pos->miscj); - kfree(pos); - } - - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; -} - -static void threshold_remove_bank(unsigned int cpu, int bank) -{ - struct threshold_bank *b; - struct device *dev; - char name[32]; - int i = 0; - - b = per_cpu(threshold_banks, cpu)[bank]; - if (!b) - return; - if (!b->blocks) - goto free_out; - - sprintf(name, "threshold_bank%i", bank); - -#ifdef CONFIG_SMP - /* sibling symlink */ - if (shared_bank[bank] && b->blocks->cpu != cpu) { - dev = per_cpu(mce_device, cpu); - sysfs_remove_link(&dev->kobj, name); - per_cpu(threshold_banks, cpu)[bank] = NULL; - - return; - } -#endif - - /* remove all sibling symlinks before unregistering */ - for_each_cpu(i, b->cpus) { - if (i == cpu) - continue; - - dev = per_cpu(mce_device, i); - if (dev) - sysfs_remove_link(&dev->kobj, name); - per_cpu(threshold_banks, i)[bank] = NULL; - } - - deallocate_threshold_block(cpu, bank); - -free_out: - kobject_del(b->kobj); - kobject_put(b->kobj); - free_cpumask_var(b->cpus); - kfree(b); - per_cpu(threshold_banks, cpu)[bank] = NULL; -} - -static void threshold_remove_device(unsigned int cpu) -{ - unsigned int bank; - - for (bank = 0; bank < NR_BANKS; ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) - continue; - threshold_remove_bank(cpu, bank); - } -} - -/* get notified when a cpu comes on/off */ -static void __cpuinit -amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) -{ - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - threshold_create_device(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - threshold_remove_device(cpu); - break; - default: - break; - } -} - -static __init int threshold_init_device(void) -{ - unsigned lcpu = 0; - - /* to hit CPUs online before the notifier is up */ - for_each_online_cpu(lcpu) { - int err = threshold_create_device(lcpu); - - if (err) - return err; - } - threshold_cpu_callback = amd_64_threshold_cpu_callback; - - return 0; -} -device_initcall(threshold_init_device); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce_intel.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce_intel.c deleted file mode 100644 index 38e49bc9..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Intel specific MCE features. - * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> - * Copyright (C) 2008, 2009 Intel Corporation - * Author: Andi Kleen - */ - -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/percpu.h> -#include <linux/sched.h> -#include <asm/apic.h> -#include <asm/processor.h> -#include <asm/msr.h> -#include <asm/mce.h> - -/* - * Support for Intel Correct Machine Check Interrupts. This allows - * the CPU to raise an interrupt when a corrected machine check happened. - * Normally we pick those up using a regular polling timer. - * Also supports reliable discovery of shared banks. - */ - -static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); - -/* - * cmci_discover_lock protects against parallel discovery attempts - * which could race against each other. - */ -static DEFINE_RAW_SPINLOCK(cmci_discover_lock); - -#define CMCI_THRESHOLD 1 - -static int cmci_supported(int *banks) -{ - u64 cap; - - if (mce_cmci_disabled || mce_ignore_ce) - return 0; - - /* - * Vendor check is not strictly needed, but the initial - * initialization is vendor keyed and this - * makes sure none of the backdoors are entered otherwise. - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return 0; - if (!cpu_has_apic || lapic_get_maxlvt() < 6) - return 0; - rdmsrl(MSR_IA32_MCG_CAP, cap); - *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); - return !!(cap & MCG_CMCI_P); -} - -/* - * The interrupt handler. This is called on every event. - * Just call the poller directly to log any events. - * This could in theory increase the threshold under high load, - * but doesn't for now. - */ -static void intel_threshold_interrupt(void) -{ - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - mce_notify_irq(); -} - -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - -/* - * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks - * on this CPU. Use the algorithm recommended in the SDM to discover shared - * banks. - */ -static void cmci_discover(int banks, int boot) -{ - unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); - unsigned long flags; - int hdr = 0; - int i; - - raw_spin_lock_irqsave(&cmci_discover_lock, flags); - for (i = 0; i < banks; i++) { - u64 val; - - if (test_bit(i, owned)) - continue; - - rdmsrl(MSR_IA32_MCx_CTL2(i), val); - - /* Already owned by someone else? */ - if (val & MCI_CTL2_CMCI_EN) { - if (test_and_clear_bit(i, owned) && !boot) - print_update("SHD", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - continue; - } - - val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; - val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; - wrmsrl(MSR_IA32_MCx_CTL2(i), val); - rdmsrl(MSR_IA32_MCx_CTL2(i), val); - - /* Did the enable bit stick? -- the bank supports CMCI */ - if (val & MCI_CTL2_CMCI_EN) { - if (!test_and_set_bit(i, owned) && !boot) - print_update("CMCI", &hdr, i); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); - } else { - WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); - } - } - raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); - if (hdr) - printk(KERN_CONT "\n"); -} - -/* - * Just in case we missed an event during initialization check - * all the CMCI owned banks. - */ -void cmci_recheck(void) -{ - unsigned long flags; - int banks; - - if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) - return; - local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); - local_irq_restore(flags); -} - -/* - * Disable CMCI on this CPU for all banks it owns when it goes down. - * This allows other CPUs to claim the banks on rediscovery. - */ -void cmci_clear(void) -{ - unsigned long flags; - int i; - int banks; - u64 val; - - if (!cmci_supported(&banks)) - return; - raw_spin_lock_irqsave(&cmci_discover_lock, flags); - for (i = 0; i < banks; i++) { - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) - continue; - /* Disable CMCI */ - rdmsrl(MSR_IA32_MCx_CTL2(i), val); - val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); - wrmsrl(MSR_IA32_MCx_CTL2(i), val); - __clear_bit(i, __get_cpu_var(mce_banks_owned)); - } - raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); -} - -/* - * After a CPU went down cycle through all the others and rediscover - * Must run in process context. - */ -void cmci_rediscover(int dying) -{ - int banks; - int cpu; - cpumask_var_t old; - - if (!cmci_supported(&banks)) - return; - if (!alloc_cpumask_var(&old, GFP_KERNEL)) - return; - cpumask_copy(old, ¤t->cpus_allowed); - - for_each_online_cpu(cpu) { - if (cpu == dying) - continue; - if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) - continue; - /* Recheck banks in case CPUs don't all have the same */ - if (cmci_supported(&banks)) - cmci_discover(banks, 0); - } - - set_cpus_allowed_ptr(current, old); - free_cpumask_var(old); -} - -/* - * Reenable CMCI on this CPU in case a CPU down failed. - */ -void cmci_reenable(void) -{ - int banks; - if (cmci_supported(&banks)) - cmci_discover(banks, 0); -} - -static void intel_init_cmci(void) -{ - int banks; - - if (!cmci_supported(&banks)) - return; - - mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); - /* - * For CPU #0 this runs with still disabled APIC, but that's - * ok because only the vector is set up. We still do another - * check for the banks later for CPU #0 just to make sure - * to not miss any events. - */ - apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); - cmci_recheck(); -} - -void mce_intel_feature_init(struct cpuinfo_x86 *c) -{ - intel_init_thermal(c); - intel_init_cmci(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/p5.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/p5.c deleted file mode 100644 index 2d5454cd..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/p5.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * P5 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> - */ -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/smp.h> - -#include <asm/processor.h> -#include <asm/mce.h> -#include <asm/msr.h> - -/* By default disabled */ -int mce_p5_enabled __read_mostly; - -/* Machine check handler for Pentium class Intel CPUs: */ -static void pentium_machine_check(struct pt_regs *regs, long error_code) -{ - u32 loaddr, hi, lotype; - - rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); - rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - - printk(KERN_EMERG - "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", - smp_processor_id(), loaddr, lotype); - - if (lotype & (1<<5)) { - printk(KERN_EMERG - "CPU#%d: Possible thermal failure (CPU on fire ?).\n", - smp_processor_id()); - } - - add_taint(TAINT_MACHINE_CHECK); -} - -/* Set up machine check reporting for processors with Intel style MCE: */ -void intel_p5_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* Default P5 to off as its often misconnected: */ - if (!mce_p5_enabled) - return; - - /* Check for MCE support: */ - if (!cpu_has(c, X86_FEATURE_MCE)) - return; - - machine_check_vector = pentium_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ - wmb(); - - /* Read registers before enabling: */ - rdmsr(MSR_IA32_P5_MC_ADDR, l, h); - rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO - "Intel old style machine check architecture supported.\n"); - - /* Enable MCE: */ - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO - "Intel old style machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/therm_throt.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/therm_throt.c deleted file mode 100644 index 47a18702..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ /dev/null @@ -1,508 +0,0 @@ -/* - * Thermal throttle event support code (such as syslog messaging and rate - * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). - * - * This allows consistent reporting of CPU thermal throttle events. - * - * Maintains a counter in /sys that keeps track of the number of thermal - * events, such that the user knows how bad the thermal problem might be - * (since the logging to syslog and mcelog is rate limited). - * - * Author: Dmitriy Zavin (dmitriyz@google.com) - * - * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. - * Inspired by Ross Biro's and Al Borchers' counter code. - */ -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/percpu.h> -#include <linux/export.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/cpu.h> - -#include <asm/processor.h> -#include <asm/apic.h> -#include <asm/idle.h> -#include <asm/mce.h> -#include <asm/msr.h> - -/* How long to wait between reporting thermal events */ -#define CHECK_INTERVAL (300 * HZ) - -#define THERMAL_THROTTLING_EVENT 0 -#define POWER_LIMIT_EVENT 1 - -/* - * Current thermal event state: - */ -struct _thermal_state { - bool new_event; - int event; - u64 next_check; - unsigned long count; - unsigned long last_count; -}; - -struct thermal_state { - struct _thermal_state core_throttle; - struct _thermal_state core_power_limit; - struct _thermal_state package_throttle; - struct _thermal_state package_power_limit; - struct _thermal_state core_thresh0; - struct _thermal_state core_thresh1; -}; - -/* Callback to handle core threshold interrupts */ -int (*platform_thermal_notify)(__u64 msr_val); -EXPORT_SYMBOL(platform_thermal_notify); - -static DEFINE_PER_CPU(struct thermal_state, thermal_state); - -static atomic_t therm_throt_en = ATOMIC_INIT(0); - -static u32 lvtthmr_init __read_mostly; - -#ifdef CONFIG_SYSFS -#define define_therm_throt_device_one_ro(_name) \ - static DEVICE_ATTR(_name, 0444, \ - therm_throt_device_show_##_name, \ - NULL) \ - -#define define_therm_throt_device_show_func(event, name) \ - \ -static ssize_t therm_throt_device_show_##event##_##name( \ - struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - unsigned int cpu = dev->id; \ - ssize_t ret; \ - \ - preempt_disable(); /* CPU hotplug */ \ - if (cpu_online(cpu)) { \ - ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_state, cpu).event.name); \ - } else \ - ret = 0; \ - preempt_enable(); \ - \ - return ret; \ -} - -define_therm_throt_device_show_func(core_throttle, count); -define_therm_throt_device_one_ro(core_throttle_count); - -define_therm_throt_device_show_func(core_power_limit, count); -define_therm_throt_device_one_ro(core_power_limit_count); - -define_therm_throt_device_show_func(package_throttle, count); -define_therm_throt_device_one_ro(package_throttle_count); - -define_therm_throt_device_show_func(package_power_limit, count); -define_therm_throt_device_one_ro(package_power_limit_count); - -static struct attribute *thermal_throttle_attrs[] = { - &dev_attr_core_throttle_count.attr, - NULL -}; - -static struct attribute_group thermal_attr_group = { - .attrs = thermal_throttle_attrs, - .name = "thermal_throttle" -}; -#endif /* CONFIG_SYSFS */ - -#define CORE_LEVEL 0 -#define PACKAGE_LEVEL 1 - -/*** - * therm_throt_process - Process thermal throttling event from interrupt - * @curr: Whether the condition is current or not (boolean), since the - * thermal interrupt normally gets called both when the thermal - * event begins and once the event has ended. - * - * This function is called by the thermal interrupt after the - * IRQ has been acknowledged. - * - * It will take care of rate limiting and printing messages to the syslog. - * - * Returns: 0 : Event should NOT be further logged, i.e. still in - * "timeout" from previous log message. - * 1 : Event should be logged further, and a message has been - * printed to the syslog. - */ -static int therm_throt_process(bool new_event, int event, int level) -{ - struct _thermal_state *state; - unsigned int this_cpu = smp_processor_id(); - bool old_event; - u64 now; - struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); - - now = get_jiffies_64(); - if (level == CORE_LEVEL) { - if (event == THERMAL_THROTTLING_EVENT) - state = &pstate->core_throttle; - else if (event == POWER_LIMIT_EVENT) - state = &pstate->core_power_limit; - else - return 0; - } else if (level == PACKAGE_LEVEL) { - if (event == THERMAL_THROTTLING_EVENT) - state = &pstate->package_throttle; - else if (event == POWER_LIMIT_EVENT) - state = &pstate->package_power_limit; - else - return 0; - } else - return 0; - - old_event = state->new_event; - state->new_event = new_event; - - if (new_event) - state->count++; - - if (time_before64(now, state->next_check) && - state->count != state->last_count) - return 0; - - state->next_check = now + CHECK_INTERVAL; - state->last_count = state->count; - - /* if we just entered the thermal event */ - if (new_event) { - if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package", - state->count); - else - printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package", - state->count); - return 1; - } - if (old_event) { - if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package"); - else - printk(KERN_INFO "CPU%d: %s power limit normal\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package"); - return 1; - } - - return 0; -} - -static int thresh_event_valid(int event) -{ - struct _thermal_state *state; - unsigned int this_cpu = smp_processor_id(); - struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); - u64 now = get_jiffies_64(); - - state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; - - if (time_before64(now, state->next_check)) - return 0; - - state->next_check = now + CHECK_INTERVAL; - return 1; -} - -#ifdef CONFIG_SYSFS -/* Add/Remove thermal_throttle interface for CPU device: */ -static __cpuinit int thermal_throttle_add_dev(struct device *dev, - unsigned int cpu) -{ - int err; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - err = sysfs_create_group(&dev->kobj, &thermal_attr_group); - if (err) - return err; - - if (cpu_has(c, X86_FEATURE_PLN)) - err = sysfs_add_file_to_group(&dev->kobj, - &dev_attr_core_power_limit_count.attr, - thermal_attr_group.name); - if (cpu_has(c, X86_FEATURE_PTS)) { - err = sysfs_add_file_to_group(&dev->kobj, - &dev_attr_package_throttle_count.attr, - thermal_attr_group.name); - if (cpu_has(c, X86_FEATURE_PLN)) - err = sysfs_add_file_to_group(&dev->kobj, - &dev_attr_package_power_limit_count.attr, - thermal_attr_group.name); - } - - return err; -} - -static __cpuinit void thermal_throttle_remove_dev(struct device *dev) -{ - sysfs_remove_group(&dev->kobj, &thermal_attr_group); -} - -/* Mutex protecting device creation against CPU hotplug: */ -static DEFINE_MUTEX(therm_cpu_lock); - -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int -thermal_throttle_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - int err = 0; - - dev = get_cpu_device(cpu); - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - mutex_lock(&therm_cpu_lock); - err = thermal_throttle_add_dev(dev, cpu); - mutex_unlock(&therm_cpu_lock); - WARN_ON(err); - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - mutex_lock(&therm_cpu_lock); - thermal_throttle_remove_dev(dev); - mutex_unlock(&therm_cpu_lock); - break; - } - return notifier_from_errno(err); -} - -static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = -{ - .notifier_call = thermal_throttle_cpu_callback, -}; - -static __init int thermal_throttle_init_device(void) -{ - unsigned int cpu = 0; - int err; - - if (!atomic_read(&therm_throt_en)) - return 0; - - register_hotcpu_notifier(&thermal_throttle_cpu_notifier); - -#ifdef CONFIG_HOTPLUG_CPU - mutex_lock(&therm_cpu_lock); -#endif - /* connect live CPUs to sysfs */ - for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); - WARN_ON(err); - } -#ifdef CONFIG_HOTPLUG_CPU - mutex_unlock(&therm_cpu_lock); -#endif - - return 0; -} -device_initcall(thermal_throttle_init_device); - -#endif /* CONFIG_SYSFS */ - -static void notify_thresholds(__u64 msr_val) -{ - /* check whether the interrupt handler is defined; - * otherwise simply return - */ - if (!platform_thermal_notify) - return; - - /* lower threshold reached */ - if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) - platform_thermal_notify(msr_val); - /* higher threshold reached */ - if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) - platform_thermal_notify(msr_val); -} - -/* Thermal transition interrupt handler */ -static void intel_thermal_interrupt(void) -{ - __u64 msr_val; - - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - - /* Check for violation of core thermal thresholds*/ - notify_thresholds(msr_val); - - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, - THERMAL_THROTTLING_EVENT, - CORE_LEVEL) != 0) - mce_log_therm_throt_event(msr_val); - - if (this_cpu_has(X86_FEATURE_PLN)) - therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, - POWER_LIMIT_EVENT, - CORE_LEVEL); - - if (this_cpu_has(X86_FEATURE_PTS)) { - rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); - therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, - THERMAL_THROTTLING_EVENT, - PACKAGE_LEVEL); - if (this_cpu_has(X86_FEATURE_PLN)) - therm_throt_process(msr_val & - PACKAGE_THERM_STATUS_POWER_LIMIT, - POWER_LIMIT_EVENT, - PACKAGE_LEVEL); - } -} - -static void unexpected_thermal_interrupt(void) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", - smp_processor_id()); -} - -static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; - -asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) -{ - irq_enter(); - exit_idle(); - inc_irq_stat(irq_thermal_count); - smp_thermal_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); -} - -/* Thermal monitoring depends on APIC, ACPI and clock modulation */ -static int intel_thermal_supported(struct cpuinfo_x86 *c) -{ - if (!cpu_has_apic) - return 0; - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) - return 0; - return 1; -} - -void __init mcheck_intel_therm_init(void) -{ - /* - * This function is only called on boot CPU. Save the init thermal - * LVT value on BSP and use that value to restore APs' thermal LVT - * entry BIOS programmed later - */ - if (intel_thermal_supported(&boot_cpu_data)) - lvtthmr_init = apic_read(APIC_LVTTHMR); -} - -void intel_init_thermal(struct cpuinfo_x86 *c) -{ - unsigned int cpu = smp_processor_id(); - int tm2 = 0; - u32 l, h; - - if (!intel_thermal_supported(c)) - return; - - /* - * First check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already: - */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - - h = lvtthmr_init; - /* - * The initial value of thermal LVT entries on all APs always reads - * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI - * sequence to them and LVT registers are reset to 0s except for - * the mask bits which are set to 1s when APs receive INIT IPI. - * If BIOS takes over the thermal interrupt and sets its interrupt - * delivery mode to SMI (not fixed), it restores the value that the - * BIOS has programmed on AP based on BSP's info we saved since BIOS - * is always setting the same value for all threads/cores. - */ - if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) - apic_write(APIC_LVTTHMR, lvtthmr_init); - - - if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG - "CPU%d: Thermal monitoring handled by SMI\n", cpu); - return; - } - - /* Check whether a vector already exists */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; - } - - /* early Pentium M models use different method for enabling TM2 */ - if (cpu_has(c, X86_FEATURE_TM2)) { - if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { - rdmsr(MSR_THERM2_CTL, l, h); - if (l & MSR_THERM2_CTL_TM_SELECT) - tm2 = 1; - } else if (l & MSR_IA32_MISC_ENABLE_TM2) - tm2 = 1; - } - - /* We'll mask the thermal vector in the lapic till we're ready: */ - h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; - apic_write(APIC_LVTTHMR, h); - - rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - if (cpu_has(c, X86_FEATURE_PLN)) - wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE - | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); - else - wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); - - if (cpu_has(c, X86_FEATURE_PTS)) { - rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); - if (cpu_has(c, X86_FEATURE_PLN)) - wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, - l | (PACKAGE_THERM_INT_LOW_ENABLE - | PACKAGE_THERM_INT_HIGH_ENABLE - | PACKAGE_THERM_INT_PLN_ENABLE), h); - else - wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, - l | (PACKAGE_THERM_INT_LOW_ENABLE - | PACKAGE_THERM_INT_HIGH_ENABLE), h); - } - - smp_thermal_vector = intel_thermal_interrupt; - - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - - printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", - tm2 ? "TM2" : "TM1"); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/threshold.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/threshold.c deleted file mode 100644 index aa578cad..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/threshold.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Common corrected MCE threshold handler code: - */ -#include <linux/interrupt.h> -#include <linux/kernel.h> - -#include <asm/irq_vectors.h> -#include <asm/apic.h> -#include <asm/idle.h> -#include <asm/mce.h> - -static void default_threshold_interrupt(void) -{ - printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n", - THRESHOLD_APIC_VECTOR); -} - -void (*mce_threshold_vector)(void) = default_threshold_interrupt; - -asmlinkage void smp_threshold_interrupt(void) -{ - irq_enter(); - exit_idle(); - inc_irq_stat(irq_threshold_count); - mce_threshold_vector(); - irq_exit(); - /* Ack only at the end to avoid potential reentry */ - ack_APIC_irq(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/winchip.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/winchip.c deleted file mode 100644 index 2d7998fb..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mcheck/winchip.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * IDT Winchip specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> - */ -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> - -#include <asm/processor.h> -#include <asm/mce.h> -#include <asm/msr.h> - -/* Machine check handler for WinChip C6: */ -static void winchip_machine_check(struct pt_regs *regs, long error_code) -{ - printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); - add_taint(TAINT_MACHINE_CHECK); -} - -/* Set up machine check reporting on the Winchip C6 series */ -void winchip_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - - machine_check_vector = winchip_machine_check; - /* Make sure the vector pointer is visible before we enable MCEs: */ - wmb(); - - rdmsr(MSR_IDT_FCR1, lo, hi); - lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ - lo &= ~(1<<4); /* Enable MCE */ - wrmsr(MSR_IDT_FCR1, lo, hi); - - set_in_cr4(X86_CR4_MCE); - - printk(KERN_INFO - "Winchip machine check reporting enabled on CPU#0.\n"); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mkcapflags.pl b/ANDROID_3.4.5/arch/x86/kernel/cpu/mkcapflags.pl deleted file mode 100644 index dfea390e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mkcapflags.pl +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/perl -# -# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h -# - -($in, $out) = @ARGV; - -open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; -open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; - -print OUT "#include <asm/cpufeature.h>\n\n"; -print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; - -while (defined($line = <IN>)) { - if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { - $macro = $1; - $feature = $2; - $tail = $3; - if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { - $feature = $1; - } - - if ($feature ne '') { - printf OUT "\t%-32s = \"%s\",\n", - "[$macro]", "\L$feature"; - } - } -} -print OUT "};\n"; - -close(IN); -close(OUT); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mshyperv.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mshyperv.c deleted file mode 100644 index 0a630dd4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mshyperv.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * HyperV Detection code. - * - * Copyright (C) 2010, Novell, Inc. - * Author : K. Y. Srinivasan <ksrinivasan@novell.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - */ - -#include <linux/types.h> -#include <linux/time.h> -#include <linux/clocksource.h> -#include <linux/module.h> -#include <asm/processor.h> -#include <asm/hypervisor.h> -#include <asm/hyperv.h> -#include <asm/mshyperv.h> - -struct ms_hyperv_info ms_hyperv; -EXPORT_SYMBOL_GPL(ms_hyperv); - -static bool __init ms_hyperv_platform(void) -{ - u32 eax; - u32 hyp_signature[3]; - - if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) - return false; - - cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, - &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); - - return eax >= HYPERV_CPUID_MIN && - eax <= HYPERV_CPUID_MAX && - !memcmp("Microsoft Hv", hyp_signature, 12); -} - -static cycle_t read_hv_clock(struct clocksource *arg) -{ - cycle_t current_tick; - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs = { - .name = "hyperv_clocksource", - .rating = 400, /* use this when running on Hyperv*/ - .read = read_hv_clock, - .mask = CLOCKSOURCE_MASK(64), -}; - -static void __init ms_hyperv_init_platform(void) -{ - /* - * Extract the features and hints - */ - ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); - ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - - printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); - - clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); -} - -const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { - .name = "Microsoft HyperV", - .detect = ms_hyperv_platform, - .init_platform = ms_hyperv_init_platform, -}; -EXPORT_SYMBOL(x86_hyper_ms_hyperv); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/Makefile b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/Makefile deleted file mode 100644 index ad9e5ed8..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-y := main.o if.o generic.o cleanup.o -obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o - diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/amd.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/amd.c deleted file mode 100644 index 92ba9cd3..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/amd.c +++ /dev/null @@ -1,124 +0,0 @@ -#include <linux/init.h> -#include <linux/mm.h> -#include <asm/mtrr.h> -#include <asm/msr.h> - -#include "mtrr.h" - -static void -amd_get_mtrr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type *type) -{ - unsigned long low, high; - - rdmsr(MSR_K6_UWCCR, low, high); - /* Upper dword is region 1, lower is region 0 */ - if (reg == 1) - low = high; - /* The base masks off on the right alignment */ - *base = (low & 0xFFFE0000) >> PAGE_SHIFT; - *type = 0; - if (low & 1) - *type = MTRR_TYPE_UNCACHABLE; - if (low & 2) - *type = MTRR_TYPE_WRCOMB; - if (!(low & 3)) { - *size = 0; - return; - } - /* - * This needs a little explaining. The size is stored as an - * inverted mask of bits of 128K granularity 15 bits long offset - * 2 bits. - * - * So to get a size we do invert the mask and add 1 to the lowest - * mask bit (4 as its 2 bits in). This gives us a size we then shift - * to turn into 128K blocks. - * - * eg 111 1111 1111 1100 is 512K - * - * invert 000 0000 0000 0011 - * +1 000 0000 0000 0100 - * *128K ... - */ - low = (~low) & 0x1FFFC; - *size = (low + 4) << (15 - PAGE_SHIFT); -} - -/** - * amd_set_mtrr - Set variable MTRR register on the local CPU. - * - * @reg The register to set. - * @base The base address of the region. - * @size The size of the region. If this is 0 the region is disabled. - * @type The type of the region. - * - * Returns nothing. - */ -static void -amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) -{ - u32 regs[2]; - - /* - * Low is MTRR0, High MTRR 1 - */ - rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); - /* - * Blank to disable - */ - if (size == 0) { - regs[reg] = 0; - } else { - /* - * Set the register to the base, the type (off by one) and an - * inverted bitmask of the size The size is the only odd - * bit. We are fed say 512K We invert this and we get 111 1111 - * 1111 1011 but if you subtract one and invert you get the - * desired 111 1111 1111 1100 mask - * - * But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! - */ - regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) - | (base << PAGE_SHIFT) | (type + 1); - } - - /* - * The writeback rule is quite specific. See the manual. Its - * disable local interrupts, write back the cache, set the mtrr - */ - wbinvd(); - wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); -} - -static int -amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) -{ - /* - * Apply the K6 block alignment and size rules - * In order - * o Uncached or gathering only - * o 128K or bigger block - * o Power of 2 block - * o base suitably aligned to the power - */ - if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) - || (size & ~(size - 1)) - size || (base & (size - 1))) - return -EINVAL; - return 0; -} - -static const struct mtrr_ops amd_mtrr_ops = { - .vendor = X86_VENDOR_AMD, - .set = amd_set_mtrr, - .get = amd_get_mtrr, - .get_free_region = generic_get_free_region, - .validate_add_page = amd_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init amd_init_mtrr(void) -{ - set_mtrr_ops(&amd_mtrr_ops); - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/centaur.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/centaur.c deleted file mode 100644 index 316fe3e6..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/centaur.c +++ /dev/null @@ -1,126 +0,0 @@ -#include <linux/init.h> -#include <linux/mm.h> - -#include <asm/mtrr.h> -#include <asm/msr.h> - -#include "mtrr.h" - -static struct { - unsigned long high; - unsigned long low; -} centaur_mcr[8]; - -static u8 centaur_mcr_reserved; -static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ - -/** - * centaur_get_free_region - Get a free MTRR. - * - * @base: The starting (base) address of the region. - * @size: The size (in bytes) of the region. - * - * Returns: the index of the region on success, else -1 on error. - */ -static int -centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) -{ - unsigned long lbase, lsize; - mtrr_type ltype; - int i, max; - - max = num_var_ranges; - if (replace_reg >= 0 && replace_reg < max) - return replace_reg; - - for (i = 0; i < max; ++i) { - if (centaur_mcr_reserved & (1 << i)) - continue; - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - - return -ENOSPC; -} - -/* - * Report boot time MCR setups - */ -void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ - centaur_mcr[mcr].low = lo; - centaur_mcr[mcr].high = hi; -} - -static void -centaur_get_mcr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - *base = centaur_mcr[reg].high >> PAGE_SHIFT; - *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; - *type = MTRR_TYPE_WRCOMB; /* write-combining */ - - if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) - *type = MTRR_TYPE_UNCACHABLE; - if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) - *type = MTRR_TYPE_WRBACK; - if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) - *type = MTRR_TYPE_WRBACK; -} - -static void -centaur_set_mcr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - unsigned long low, high; - - if (size == 0) { - /* Disable */ - high = low = 0; - } else { - high = base << PAGE_SHIFT; - if (centaur_mcr_type == 0) { - /* Only support write-combining... */ - low = -size << PAGE_SHIFT | 0x1f; - } else { - if (type == MTRR_TYPE_UNCACHABLE) - low = -size << PAGE_SHIFT | 0x02; /* NC */ - else - low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */ - } - } - centaur_mcr[reg].high = high; - centaur_mcr[reg].low = low; - wrmsr(MSR_IDT_MCR0 + reg, low, high); -} - -static int -centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type) -{ - /* - * FIXME: Winchip2 supports uncached - */ - if (type != MTRR_TYPE_WRCOMB && - (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { - pr_warning("mtrr: only write-combining%s supported\n", - centaur_mcr_type ? " and uncacheable are" : " is"); - return -EINVAL; - } - return 0; -} - -static const struct mtrr_ops centaur_mtrr_ops = { - .vendor = X86_VENDOR_CENTAUR, - .set = centaur_set_mcr, - .get = centaur_get_mcr, - .get_free_region = centaur_get_free_region, - .validate_add_page = centaur_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init centaur_init_mtrr(void) -{ - set_mtrr_ops(¢aur_mtrr_ops); - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/cleanup.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/cleanup.c deleted file mode 100644 index ac140c7b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/cleanup.c +++ /dev/null @@ -1,980 +0,0 @@ -/* - * MTRR (Memory Type Range Register) cleanup - * - * Copyright (C) 2009 Yinghai Lu - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include <linux/module.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/smp.h> -#include <linux/cpu.h> -#include <linux/mutex.h> -#include <linux/uaccess.h> -#include <linux/kvm_para.h> -#include <linux/range.h> - -#include <asm/processor.h> -#include <asm/e820.h> -#include <asm/mtrr.h> -#include <asm/msr.h> - -#include "mtrr.h" - -struct var_mtrr_range_state { - unsigned long base_pfn; - unsigned long size_pfn; - mtrr_type type; -}; - -struct var_mtrr_state { - unsigned long range_startk; - unsigned long range_sizek; - unsigned long chunk_sizek; - unsigned long gran_sizek; - unsigned int reg; -}; - -/* Should be related to MTRR_VAR_RANGES nums */ -#define RANGE_NUM 256 - -static struct range __initdata range[RANGE_NUM]; -static int __initdata nr_range; - -static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; - -static int __initdata debug_print; -#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) - -#define BIOS_BUG_MSG KERN_WARNING \ - "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" - -static int __init -x86_get_mtrr_mem_range(struct range *range, int nr_range, - unsigned long extra_remove_base, - unsigned long extra_remove_size) -{ - unsigned long base, size; - mtrr_type type; - int i; - - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type != MTRR_TYPE_WRBACK) - continue; - base = range_state[i].base_pfn; - size = range_state[i].size_pfn; - nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, - base, base + size); - } - if (debug_print) { - printk(KERN_DEBUG "After WB checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } - - /* Take out UC ranges: */ - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type != MTRR_TYPE_UNCACHABLE && - type != MTRR_TYPE_WRPROT) - continue; - size = range_state[i].size_pfn; - if (!size) - continue; - base = range_state[i].base_pfn; - if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && - (mtrr_state.enabled & 1)) { - /* Var MTRR contains UC entry below 1M? Skip it: */ - printk(BIOS_BUG_MSG, i); - if (base + size <= (1<<(20-PAGE_SHIFT))) - continue; - size -= (1<<(20-PAGE_SHIFT)) - base; - base = 1<<(20-PAGE_SHIFT); - } - subtract_range(range, RANGE_NUM, base, base + size); - } - if (extra_remove_size) - subtract_range(range, RANGE_NUM, extra_remove_base, - extra_remove_base + extra_remove_size); - - if (debug_print) { - printk(KERN_DEBUG "After UC checking\n"); - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } - } - - /* sort the ranges */ - nr_range = clean_sort_range(range, RANGE_NUM); - if (debug_print) { - printk(KERN_DEBUG "After sorting\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } - - return nr_range; -} - -#ifdef CONFIG_MTRR_SANITIZER - -static unsigned long __init sum_ranges(struct range *range, int nr_range) -{ - unsigned long sum = 0; - int i; - - for (i = 0; i < nr_range; i++) - sum += range[i].end - range[i].start; - - return sum; -} - -static int enable_mtrr_cleanup __initdata = - CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; - -static int __init disable_mtrr_cleanup_setup(char *str) -{ - enable_mtrr_cleanup = 0; - return 0; -} -early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); - -static int __init enable_mtrr_cleanup_setup(char *str) -{ - enable_mtrr_cleanup = 1; - return 0; -} -early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); - -static int __init mtrr_cleanup_debug_setup(char *str) -{ - debug_print = 1; - return 0; -} -early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); - -static void __init -set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned int address_bits) -{ - u32 base_lo, base_hi, mask_lo, mask_hi; - u64 base, mask; - - if (!sizek) { - fill_mtrr_var_range(reg, 0, 0, 0, 0); - return; - } - - mask = (1ULL << address_bits) - 1; - mask &= ~((((u64)sizek) << 10) - 1); - - base = ((u64)basek) << 10; - - base |= type; - mask |= 0x800; - - base_lo = base & ((1ULL<<32) - 1); - base_hi = base >> 32; - - mask_lo = mask & ((1ULL<<32) - 1); - mask_hi = mask >> 32; - - fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); -} - -static void __init -save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type) -{ - range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); - range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); - range_state[reg].type = type; -} - -static void __init set_var_mtrr_all(unsigned int address_bits) -{ - unsigned long basek, sizek; - unsigned char type; - unsigned int reg; - - for (reg = 0; reg < num_var_ranges; reg++) { - basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); - sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); - type = range_state[reg].type; - - set_var_mtrr(reg, basek, sizek, type, address_bits); - } -} - -static unsigned long to_size_factor(unsigned long sizek, char *factorp) -{ - unsigned long base = sizek; - char factor; - - if (base & ((1<<10) - 1)) { - /* Not MB-aligned: */ - factor = 'K'; - } else if (base & ((1<<20) - 1)) { - factor = 'M'; - base >>= 10; - } else { - factor = 'G'; - base >>= 20; - } - - *factorp = factor; - - return base; -} - -static unsigned int __init -range_to_mtrr(unsigned int reg, unsigned long range_startk, - unsigned long range_sizek, unsigned char type) -{ - if (!range_sizek || (reg >= num_var_ranges)) - return reg; - - while (range_sizek) { - unsigned long max_align, align; - unsigned long sizek; - - /* Compute the maximum size with which we can make a range: */ - if (range_startk) - max_align = ffs(range_startk) - 1; - else - max_align = 32; - - align = fls(range_sizek) - 1; - if (align > max_align) - align = max_align; - - sizek = 1 << align; - if (debug_print) { - char start_factor = 'K', size_factor = 'K'; - unsigned long start_base, size_base; - - start_base = to_size_factor(range_startk, &start_factor); - size_base = to_size_factor(sizek, &size_factor); - - Dprintk("Setting variable MTRR %d, " - "base: %ld%cB, range: %ld%cB, type %s\n", - reg, start_base, start_factor, - size_base, size_factor, - (type == MTRR_TYPE_UNCACHABLE) ? "UC" : - ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") - ); - } - save_var_mtrr(reg++, range_startk, sizek, type); - range_startk += sizek; - range_sizek -= sizek; - if (reg >= num_var_ranges) - break; - } - return reg; -} - -static unsigned __init -range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, - unsigned long sizek) -{ - unsigned long hole_basek, hole_sizek; - unsigned long second_basek, second_sizek; - unsigned long range0_basek, range0_sizek; - unsigned long range_basek, range_sizek; - unsigned long chunk_sizek; - unsigned long gran_sizek; - - hole_basek = 0; - hole_sizek = 0; - second_basek = 0; - second_sizek = 0; - chunk_sizek = state->chunk_sizek; - gran_sizek = state->gran_sizek; - - /* Align with gran size, prevent small block used up MTRRs: */ - range_basek = ALIGN(state->range_startk, gran_sizek); - if ((range_basek > basek) && basek) - return second_sizek; - - state->range_sizek -= (range_basek - state->range_startk); - range_sizek = ALIGN(state->range_sizek, gran_sizek); - - while (range_sizek > state->range_sizek) { - range_sizek -= gran_sizek; - if (!range_sizek) - return 0; - } - state->range_sizek = range_sizek; - - /* Try to append some small hole: */ - range0_basek = state->range_startk; - range0_sizek = ALIGN(state->range_sizek, chunk_sizek); - - /* No increase: */ - if (range0_sizek == state->range_sizek) { - Dprintk("rangeX: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + state->range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - state->range_sizek, MTRR_TYPE_WRBACK); - return 0; - } - - /* Only cut back when it is not the last: */ - if (sizek) { - while (range0_basek + range0_sizek > (basek + sizek)) { - if (range0_sizek >= chunk_sizek) - range0_sizek -= chunk_sizek; - else - range0_sizek = 0; - - if (!range0_sizek) - break; - } - } - -second_try: - range_basek = range0_basek + range0_sizek; - - /* One hole in the middle: */ - if (range_basek > basek && range_basek <= (basek + sizek)) - second_sizek = range_basek - basek; - - if (range0_sizek > state->range_sizek) { - - /* One hole in middle or at the end: */ - hole_sizek = range0_sizek - state->range_sizek - second_sizek; - - /* Hole size should be less than half of range0 size: */ - if (hole_sizek >= (range0_sizek >> 1) && - range0_sizek >= chunk_sizek) { - range0_sizek -= chunk_sizek; - second_sizek = 0; - hole_sizek = 0; - - goto second_try; - } - } - - if (range0_sizek) { - Dprintk("range0: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + range0_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - range0_sizek, MTRR_TYPE_WRBACK); - } - - if (range0_sizek < state->range_sizek) { - /* Need to handle left over range: */ - range_sizek = state->range_sizek - range0_sizek; - - Dprintk("range: %016lx - %016lx\n", - range_basek<<10, - (range_basek + range_sizek)<<10); - - state->reg = range_to_mtrr(state->reg, range_basek, - range_sizek, MTRR_TYPE_WRBACK); - } - - if (hole_sizek) { - hole_basek = range_basek - hole_sizek - second_sizek; - Dprintk("hole: %016lx - %016lx\n", - hole_basek<<10, - (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, - hole_sizek, MTRR_TYPE_UNCACHABLE); - } - - return second_sizek; -} - -static void __init -set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, - unsigned long size_pfn) -{ - unsigned long basek, sizek; - unsigned long second_sizek = 0; - - if (state->reg >= num_var_ranges) - return; - - basek = base_pfn << (PAGE_SHIFT - 10); - sizek = size_pfn << (PAGE_SHIFT - 10); - - /* See if I can merge with the last range: */ - if ((basek <= 1024) || - (state->range_startk + state->range_sizek == basek)) { - unsigned long endk = basek + sizek; - state->range_sizek = endk - state->range_startk; - return; - } - /* Write the range mtrrs: */ - if (state->range_sizek != 0) - second_sizek = range_to_mtrr_with_hole(state, basek, sizek); - - /* Allocate an msr: */ - state->range_startk = basek + second_sizek; - state->range_sizek = sizek - second_sizek; -} - -/* Mininum size of mtrr block that can take hole: */ -static u64 mtrr_chunk_size __initdata = (256ULL<<20); - -static int __init parse_mtrr_chunk_size_opt(char *p) -{ - if (!p) - return -EINVAL; - mtrr_chunk_size = memparse(p, &p); - return 0; -} -early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); - -/* Granularity of mtrr of block: */ -static u64 mtrr_gran_size __initdata; - -static int __init parse_mtrr_gran_size_opt(char *p) -{ - if (!p) - return -EINVAL; - mtrr_gran_size = memparse(p, &p); - return 0; -} -early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); - -static unsigned long nr_mtrr_spare_reg __initdata = - CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; - -static int __init parse_mtrr_spare_reg(char *arg) -{ - if (arg) - nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); - return 0; -} -early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); - -static int __init -x86_setup_var_mtrrs(struct range *range, int nr_range, - u64 chunk_size, u64 gran_size) -{ - struct var_mtrr_state var_state; - int num_reg; - int i; - - var_state.range_startk = 0; - var_state.range_sizek = 0; - var_state.reg = 0; - var_state.chunk_sizek = chunk_size >> 10; - var_state.gran_sizek = gran_size >> 10; - - memset(range_state, 0, sizeof(range_state)); - - /* Write the range: */ - for (i = 0; i < nr_range; i++) { - set_var_mtrr_range(&var_state, range[i].start, - range[i].end - range[i].start); - } - - /* Write the last range: */ - if (var_state.range_sizek != 0) - range_to_mtrr_with_hole(&var_state, 0, 0); - - num_reg = var_state.reg; - /* Clear out the extra MTRR's: */ - while (var_state.reg < num_var_ranges) { - save_var_mtrr(var_state.reg, 0, 0, 0); - var_state.reg++; - } - - return num_reg; -} - -struct mtrr_cleanup_result { - unsigned long gran_sizek; - unsigned long chunk_sizek; - unsigned long lose_cover_sizek; - unsigned int num_reg; - int bad; -}; - -/* - * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G - * chunk size: gran_size, ..., 2G - * so we need (1+16)*8 - */ -#define NUM_RESULT 136 -#define PSHIFT (PAGE_SHIFT - 10) - -static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; -static unsigned long __initdata min_loss_pfn[RANGE_NUM]; - -static void __init print_out_mtrr_range_state(void) -{ - char start_factor = 'K', size_factor = 'K'; - unsigned long start_base, size_base; - mtrr_type type; - int i; - - for (i = 0; i < num_var_ranges; i++) { - - size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); - if (!size_base) - continue; - - size_base = to_size_factor(size_base, &size_factor), - start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); - start_base = to_size_factor(start_base, &start_factor), - type = range_state[i].type; - - printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", - i, start_base, start_factor, - size_base, size_factor, - (type == MTRR_TYPE_UNCACHABLE) ? "UC" : - ((type == MTRR_TYPE_WRPROT) ? "WP" : - ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) - ); - } -} - -static int __init mtrr_need_cleanup(void) -{ - int i; - mtrr_type type; - unsigned long size; - /* Extra one for all 0: */ - int num[MTRR_NUM_TYPES + 1]; - - /* Check entries number: */ - memset(num, 0, sizeof(num)); - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - size = range_state[i].size_pfn; - if (type >= MTRR_NUM_TYPES) - continue; - if (!size) - type = MTRR_NUM_TYPES; - num[type]++; - } - - /* Check if we got UC entries: */ - if (!num[MTRR_TYPE_UNCACHABLE]) - return 0; - - /* Check if we only had WB and UC */ - if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != - num_var_ranges - num[MTRR_NUM_TYPES]) - return 0; - - return 1; -} - -static unsigned long __initdata range_sums; - -static void __init -mtrr_calc_range_state(u64 chunk_size, u64 gran_size, - unsigned long x_remove_base, - unsigned long x_remove_size, int i) -{ - static struct range range_new[RANGE_NUM]; - unsigned long range_sums_new; - static int nr_range_new; - int num_reg; - - /* Convert ranges to var ranges state: */ - num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); - - /* We got new setting in range_state, check it: */ - memset(range_new, 0, sizeof(range_new)); - nr_range_new = x86_get_mtrr_mem_range(range_new, 0, - x_remove_base, x_remove_size); - range_sums_new = sum_ranges(range_new, nr_range_new); - - result[i].chunk_sizek = chunk_size >> 10; - result[i].gran_sizek = gran_size >> 10; - result[i].num_reg = num_reg; - - if (range_sums < range_sums_new) { - result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT; - result[i].bad = 1; - } else { - result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; - } - - /* Double check it: */ - if (!result[i].bad && !result[i].lose_cover_sizek) { - if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range))) - result[i].bad = 1; - } - - if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg])) - min_loss_pfn[num_reg] = range_sums - range_sums_new; -} - -static void __init mtrr_print_out_one_result(int i) -{ - unsigned long gran_base, chunk_base, lose_base; - char gran_factor, chunk_factor, lose_factor; - - gran_base = to_size_factor(result[i].gran_sizek, &gran_factor); - chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor); - lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor); - - pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", - result[i].bad ? "*BAD*" : " ", - gran_base, gran_factor, chunk_base, chunk_factor); - pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n", - result[i].num_reg, result[i].bad ? "-" : "", - lose_base, lose_factor); -} - -static int __init mtrr_search_optimal_index(void) -{ - int num_reg_good; - int index_good; - int i; - - if (nr_mtrr_spare_reg >= num_var_ranges) - nr_mtrr_spare_reg = num_var_ranges - 1; - - num_reg_good = -1; - for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { - if (!min_loss_pfn[i]) - num_reg_good = i; - } - - index_good = -1; - if (num_reg_good != -1) { - for (i = 0; i < NUM_RESULT; i++) { - if (!result[i].bad && - result[i].num_reg == num_reg_good && - !result[i].lose_cover_sizek) { - index_good = i; - break; - } - } - } - - return index_good; -} - -int __init mtrr_cleanup(unsigned address_bits) -{ - unsigned long x_remove_base, x_remove_size; - unsigned long base, size, def, dummy; - u64 chunk_size, gran_size; - mtrr_type type; - int index_good; - int i; - - if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) - return 0; - - rdmsr(MSR_MTRRdefType, def, dummy); - def &= 0xff; - if (def != MTRR_TYPE_UNCACHABLE) - return 0; - - /* Get it and store it aside: */ - memset(range_state, 0, sizeof(range_state)); - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - range_state[i].base_pfn = base; - range_state[i].size_pfn = size; - range_state[i].type = type; - } - - /* Check if we need handle it and can handle it: */ - if (!mtrr_need_cleanup()) - return 0; - - /* Print original var MTRRs at first, for debugging: */ - printk(KERN_DEBUG "original variable MTRRs\n"); - print_out_mtrr_range_state(); - - memset(range, 0, sizeof(range)); - x_remove_size = 0; - x_remove_base = 1 << (32 - PAGE_SHIFT); - if (mtrr_tom2) - x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; - - nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); - /* - * [0, 1M) should always be covered by var mtrr with WB - * and fixed mtrrs should take effect before var mtrr for it: - */ - nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, - 1ULL<<(20 - PAGE_SHIFT)); - /* Sort the ranges: */ - sort_range(range, nr_range); - - range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM covered: %ldM\n", - range_sums >> (20 - PAGE_SHIFT)); - - if (mtrr_chunk_size && mtrr_gran_size) { - i = 0; - mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, - x_remove_base, x_remove_size, i); - - mtrr_print_out_one_result(i); - - if (!result[i].bad) { - set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); - print_out_mtrr_range_state(); - return 1; - } - printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " - "will find optimal one\n"); - } - - i = 0; - memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); - memset(result, 0, sizeof(result)); - for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { - - for (chunk_size = gran_size; chunk_size < (1ULL<<32); - chunk_size <<= 1) { - - if (i >= NUM_RESULT) - continue; - - mtrr_calc_range_state(chunk_size, gran_size, - x_remove_base, x_remove_size, i); - if (debug_print) { - mtrr_print_out_one_result(i); - printk(KERN_INFO "\n"); - } - - i++; - } - } - - /* Try to find the optimal index: */ - index_good = mtrr_search_optimal_index(); - - if (index_good != -1) { - printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); - i = index_good; - mtrr_print_out_one_result(i); - - /* Convert ranges to var ranges state: */ - chunk_size = result[i].chunk_sizek; - chunk_size <<= 10; - gran_size = result[i].gran_sizek; - gran_size <<= 10; - x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); - set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); - print_out_mtrr_range_state(); - return 1; - } else { - /* print out all */ - for (i = 0; i < NUM_RESULT; i++) - mtrr_print_out_one_result(i); - } - - printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); - printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); - - return 0; -} -#else -int __init mtrr_cleanup(unsigned address_bits) -{ - return 0; -} -#endif - -static int disable_mtrr_trim; - -static int __init disable_mtrr_trim_setup(char *str) -{ - disable_mtrr_trim = 1; - return 0; -} -early_param("disable_mtrr_trim", disable_mtrr_trim_setup); - -/* - * Newer AMD K8s and later CPUs have a special magic MSR way to force WB - * for memory >4GB. Check for that here. - * Note this won't check if the MTRRs < 4GB where the magic bit doesn't - * apply to are wrong, but so far we don't know of any such case in the wild. - */ -#define Tom2Enabled (1U << 21) -#define Tom2ForceMemTypeWB (1U << 22) - -int __init amd_special_default_mtrr(void) -{ - u32 l, h; - - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return 0; - if (boot_cpu_data.x86 < 0xf) - return 0; - /* In case some hypervisor doesn't pass SYSCFG through: */ - if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) - return 0; - /* - * Memory between 4GB and top of mem is forced WB by this magic bit. - * Reserved before K8RevF, but should be zero there. - */ - if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == - (Tom2Enabled | Tom2ForceMemTypeWB)) - return 1; - return 0; -} - -static u64 __init -real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) -{ - u64 trim_start, trim_size; - - trim_start = start_pfn; - trim_start <<= PAGE_SHIFT; - - trim_size = limit_pfn; - trim_size <<= PAGE_SHIFT; - trim_size -= trim_start; - - return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED); -} - -/** - * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs - * @end_pfn: ending page frame number - * - * Some buggy BIOSes don't setup the MTRRs properly for systems with certain - * memory configurations. This routine checks that the highest MTRR matches - * the end of memory, to make sure the MTRRs having a write back type cover - * all of the memory the kernel is intending to use. If not, it'll trim any - * memory off the end by adjusting end_pfn, removing it from the kernel's - * allocation pools, warning the user with an obnoxious message. - */ -int __init mtrr_trim_uncached_memory(unsigned long end_pfn) -{ - unsigned long i, base, size, highest_pfn = 0, def, dummy; - mtrr_type type; - u64 total_trim_size; - /* extra one for all 0 */ - int num[MTRR_NUM_TYPES + 1]; - - /* - * Make sure we only trim uncachable memory on machines that - * support the Intel MTRR architecture: - */ - if (!is_cpu(INTEL) || disable_mtrr_trim) - return 0; - - rdmsr(MSR_MTRRdefType, def, dummy); - def &= 0xff; - if (def != MTRR_TYPE_UNCACHABLE) - return 0; - - /* Get it and store it aside: */ - memset(range_state, 0, sizeof(range_state)); - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - range_state[i].base_pfn = base; - range_state[i].size_pfn = size; - range_state[i].type = type; - } - - /* Find highest cached pfn: */ - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type != MTRR_TYPE_WRBACK) - continue; - base = range_state[i].base_pfn; - size = range_state[i].size_pfn; - if (highest_pfn < base + size) - highest_pfn = base + size; - } - - /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ - if (!highest_pfn) { - printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); - return 0; - } - - /* Check entries number: */ - memset(num, 0, sizeof(num)); - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type >= MTRR_NUM_TYPES) - continue; - size = range_state[i].size_pfn; - if (!size) - type = MTRR_NUM_TYPES; - num[type]++; - } - - /* No entry for WB? */ - if (!num[MTRR_TYPE_WRBACK]) - return 0; - - /* Check if we only had WB and UC: */ - if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != - num_var_ranges - num[MTRR_NUM_TYPES]) - return 0; - - memset(range, 0, sizeof(range)); - nr_range = 0; - if (mtrr_tom2) { - range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); - range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT; - if (highest_pfn < range[nr_range].end) - highest_pfn = range[nr_range].end; - nr_range++; - } - nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); - - /* Check the head: */ - total_trim_size = 0; - if (range[0].start) - total_trim_size += real_trim_memory(0, range[0].start); - - /* Check the holes: */ - for (i = 0; i < nr_range - 1; i++) { - if (range[i].end < range[i+1].start) - total_trim_size += real_trim_memory(range[i].end, - range[i+1].start); - } - - /* Check the top: */ - i = nr_range - 1; - if (range[i].end < end_pfn) - total_trim_size += real_trim_memory(range[i].end, - end_pfn); - - if (total_trim_size) { - pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); - - if (!changed_by_mtrr_cleanup) - WARN_ON(1); - - pr_info("update e820 for mtrr\n"); - update_e820(); - - return 1; - } - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/cyrix.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/cyrix.c deleted file mode 100644 index 68a3343e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/cyrix.c +++ /dev/null @@ -1,282 +0,0 @@ -#include <linux/init.h> -#include <linux/io.h> -#include <linux/mm.h> - -#include <asm/processor-cyrix.h> -#include <asm/processor-flags.h> -#include <asm/mtrr.h> -#include <asm/msr.h> - -#include "mtrr.h" - -static void -cyrix_get_arr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - unsigned char arr, ccr3, rcr, shift; - unsigned long flags; - - arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ - - local_irq_save(flags); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ((unsigned char *)base)[3] = getCx86(arr); - ((unsigned char *)base)[2] = getCx86(arr + 1); - ((unsigned char *)base)[1] = getCx86(arr + 2); - rcr = getCx86(CX86_RCR_BASE + reg); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - - local_irq_restore(flags); - - shift = ((unsigned char *) base)[1] & 0x0f; - *base >>= PAGE_SHIFT; - - /* - * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 - * Note: shift==0xf means 4G, this is unsupported. - */ - if (shift) - *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); - else - *size = 0; - - /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */ - if (reg < 7) { - switch (rcr) { - case 1: - *type = MTRR_TYPE_UNCACHABLE; - break; - case 8: - *type = MTRR_TYPE_WRBACK; - break; - case 9: - *type = MTRR_TYPE_WRCOMB; - break; - case 24: - default: - *type = MTRR_TYPE_WRTHROUGH; - break; - } - } else { - switch (rcr) { - case 0: - *type = MTRR_TYPE_UNCACHABLE; - break; - case 8: - *type = MTRR_TYPE_WRCOMB; - break; - case 9: - *type = MTRR_TYPE_WRBACK; - break; - case 25: - default: - *type = MTRR_TYPE_WRTHROUGH; - break; - } - } -} - -/* - * cyrix_get_free_region - get a free ARR. - * - * @base: the starting (base) address of the region. - * @size: the size (in bytes) of the region. - * - * Returns: the index of the region on success, else -1 on error. -*/ -static int -cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) -{ - unsigned long lbase, lsize; - mtrr_type ltype; - int i; - - switch (replace_reg) { - case 7: - if (size < 0x40) - break; - case 6: - case 5: - case 4: - return replace_reg; - case 3: - case 2: - case 1: - case 0: - return replace_reg; - } - /* If we are to set up a region >32M then look at ARR7 immediately */ - if (size > 0x2000) { - cyrix_get_arr(7, &lbase, &lsize, <ype); - if (lsize == 0) - return 7; - /* Else try ARR0-ARR6 first */ - } else { - for (i = 0; i < 7; i++) { - cyrix_get_arr(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - /* - * ARR0-ARR6 isn't free - * try ARR7 but its size must be at least 256K - */ - cyrix_get_arr(i, &lbase, &lsize, <ype); - if ((lsize == 0) && (size >= 0x40)) - return i; - } - return -ENOSPC; -} - -static u32 cr4, ccr3; - -static void prepare_set(void) -{ - u32 cr0; - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); - } - - /* - * Disable and flush caches. - * Note that wbinvd flushes the TLBs as a side-effect - */ - cr0 = read_cr0() | X86_CR0_CD; - wbinvd(); - write_cr0(cr0); - wbinvd(); - - /* Cyrix ARRs - everything else was excluded at the top */ - ccr3 = getCx86(CX86_CCR3); - - /* Cyrix ARRs - everything else was excluded at the top */ - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); -} - -static void post_set(void) -{ - /* Flush caches and TLBs */ - wbinvd(); - - /* Cyrix ARRs - everything else was excluded at the top */ - setCx86(CX86_CCR3, ccr3); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if (cpu_has_pge) - write_cr4(cr4); -} - -static void cyrix_set_arr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - unsigned char arr, arr_type, arr_size; - - arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ - - /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ - if (reg >= 7) - size >>= 6; - - size &= 0x7fff; /* make sure arr_size <= 14 */ - for (arr_size = 0; size; arr_size++, size >>= 1) - ; - - if (reg < 7) { - switch (type) { - case MTRR_TYPE_UNCACHABLE: - arr_type = 1; - break; - case MTRR_TYPE_WRCOMB: - arr_type = 9; - break; - case MTRR_TYPE_WRTHROUGH: - arr_type = 24; - break; - default: - arr_type = 8; - break; - } - } else { - switch (type) { - case MTRR_TYPE_UNCACHABLE: - arr_type = 0; - break; - case MTRR_TYPE_WRCOMB: - arr_type = 8; - break; - case MTRR_TYPE_WRTHROUGH: - arr_type = 25; - break; - default: - arr_type = 9; - break; - } - } - - prepare_set(); - - base <<= PAGE_SHIFT; - setCx86(arr + 0, ((unsigned char *)&base)[3]); - setCx86(arr + 1, ((unsigned char *)&base)[2]); - setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size); - setCx86(CX86_RCR_BASE + reg, arr_type); - - post_set(); -} - -typedef struct { - unsigned long base; - unsigned long size; - mtrr_type type; -} arr_state_t; - -static arr_state_t arr_state[8] = { - {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, - {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} -}; - -static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; - -static void cyrix_set_all(void) -{ - int i; - - prepare_set(); - - /* the CCRs are not contiguous */ - for (i = 0; i < 4; i++) - setCx86(CX86_CCR0 + i, ccr_state[i]); - for (; i < 7; i++) - setCx86(CX86_CCR4 + i, ccr_state[i]); - - for (i = 0; i < 8; i++) { - cyrix_set_arr(i, arr_state[i].base, - arr_state[i].size, arr_state[i].type); - } - - post_set(); -} - -static const struct mtrr_ops cyrix_mtrr_ops = { - .vendor = X86_VENDOR_CYRIX, - .set_all = cyrix_set_all, - .set = cyrix_set_arr, - .get = cyrix_get_arr, - .get_free_region = cyrix_get_free_region, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init cyrix_init_mtrr(void) -{ - set_mtrr_ops(&cyrix_mtrr_ops); - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/generic.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/generic.c deleted file mode 100644 index 75772ae6..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/generic.c +++ /dev/null @@ -1,846 +0,0 @@ -/* - * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong - * because MTRRs can span up to 40 bits (36bits on most modern x86) - */ -#define DEBUG - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/mm.h> - -#include <asm/processor-flags.h> -#include <asm/cpufeature.h> -#include <asm/tlbflush.h> -#include <asm/mtrr.h> -#include <asm/msr.h> -#include <asm/pat.h> - -#include "mtrr.h" - -struct fixed_range_block { - int base_msr; /* start address of an MTRR block */ - int ranges; /* number of MTRRs in this block */ -}; - -static struct fixed_range_block fixed_range_blocks[] = { - { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ - { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ - { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ - {} -}; - -static unsigned long smp_changes_mask; -static int mtrr_state_set; -u64 mtrr_tom2; - -struct mtrr_state_type mtrr_state; -EXPORT_SYMBOL_GPL(mtrr_state); - -/* - * BIOS is expected to clear MtrrFixDramModEn bit, see for example - * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD - * Opteron Processors" (26094 Rev. 3.30 February 2006), section - * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set - * to 1 during BIOS initalization of the fixed MTRRs, then cleared to - * 0 for operation." - */ -static inline void k8_check_syscfg_dram_mod_en(void) -{ - u32 lo, hi; - - if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && - (boot_cpu_data.x86 >= 0x0f))) - return; - - rdmsr(MSR_K8_SYSCFG, lo, hi); - if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { - printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" - " not cleared by BIOS, clearing this bit\n", - smp_processor_id()); - lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; - mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); - } -} - -/* Get the size of contiguous MTRR range */ -static u64 get_mtrr_size(u64 mask) -{ - u64 size; - - mask >>= PAGE_SHIFT; - mask |= size_or_mask; - size = -mask; - size <<= PAGE_SHIFT; - return size; -} - -/* - * Check and return the effective type for MTRR-MTRR type overlap. - * Returns 1 if the effective type is UNCACHEABLE, else returns 0 - */ -static int check_type_overlap(u8 *prev, u8 *curr) -{ - if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) { - *prev = MTRR_TYPE_UNCACHABLE; - *curr = MTRR_TYPE_UNCACHABLE; - return 1; - } - - if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) || - (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) { - *prev = MTRR_TYPE_WRTHROUGH; - *curr = MTRR_TYPE_WRTHROUGH; - } - - if (*prev != *curr) { - *prev = MTRR_TYPE_UNCACHABLE; - *curr = MTRR_TYPE_UNCACHABLE; - return 1; - } - - return 0; -} - -/* - * Error/Semi-error returns: - * 0xFF - when MTRR is not enabled - * *repeat == 1 implies [start:end] spanned across MTRR range and type returned - * corresponds only to [start:*partial_end]. - * Caller has to lookup again for [*partial_end:end]. - */ -static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) -{ - int i; - u64 base, mask; - u8 prev_match, curr_match; - - *repeat = 0; - if (!mtrr_state_set) - return 0xFF; - - if (!mtrr_state.enabled) - return 0xFF; - - /* Make end inclusive end, instead of exclusive */ - end--; - - /* Look in fixed ranges. Just return the type as per start */ - if (mtrr_state.have_fixed && (start < 0x100000)) { - int idx; - - if (start < 0x80000) { - idx = 0; - idx += (start >> 16); - return mtrr_state.fixed_ranges[idx]; - } else if (start < 0xC0000) { - idx = 1 * 8; - idx += ((start - 0x80000) >> 14); - return mtrr_state.fixed_ranges[idx]; - } else if (start < 0x1000000) { - idx = 3 * 8; - idx += ((start - 0xC0000) >> 12); - return mtrr_state.fixed_ranges[idx]; - } - } - - /* - * Look in variable ranges - * Look of multiple ranges matching this address and pick type - * as per MTRR precedence - */ - if (!(mtrr_state.enabled & 2)) - return mtrr_state.def_type; - - prev_match = 0xFF; - for (i = 0; i < num_var_ranges; ++i) { - unsigned short start_state, end_state; - - if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11))) - continue; - - base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) + - (mtrr_state.var_ranges[i].base_lo & PAGE_MASK); - mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) + - (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK); - - start_state = ((start & mask) == (base & mask)); - end_state = ((end & mask) == (base & mask)); - - if (start_state != end_state) { - /* - * We have start:end spanning across an MTRR. - * We split the region into - * either - * (start:mtrr_end) (mtrr_end:end) - * or - * (start:mtrr_start) (mtrr_start:end) - * depending on kind of overlap. - * Return the type for first region and a pointer to - * the start of second region so that caller will - * lookup again on the second region. - * Note: This way we handle multiple overlaps as well. - */ - if (start_state) - *partial_end = base + get_mtrr_size(mask); - else - *partial_end = base; - - if (unlikely(*partial_end <= start)) { - WARN_ON(1); - *partial_end = start + PAGE_SIZE; - } - - end = *partial_end - 1; /* end is inclusive */ - *repeat = 1; - } - - if ((start & mask) != (base & mask)) - continue; - - curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; - if (prev_match == 0xFF) { - prev_match = curr_match; - continue; - } - - if (check_type_overlap(&prev_match, &curr_match)) - return curr_match; - } - - if (mtrr_tom2) { - if (start >= (1ULL<<32) && (end < mtrr_tom2)) - return MTRR_TYPE_WRBACK; - } - - if (prev_match != 0xFF) - return prev_match; - - return mtrr_state.def_type; -} - -/* - * Returns the effective MTRR type for the region - * Error return: - * 0xFF - when MTRR is not enabled - */ -u8 mtrr_type_lookup(u64 start, u64 end) -{ - u8 type, prev_type; - int repeat; - u64 partial_end; - - type = __mtrr_type_lookup(start, end, &partial_end, &repeat); - - /* - * Common path is with repeat = 0. - * However, we can have cases where [start:end] spans across some - * MTRR range. Do repeated lookups for that case here. - */ - while (repeat) { - prev_type = type; - start = partial_end; - type = __mtrr_type_lookup(start, end, &partial_end, &repeat); - - if (check_type_overlap(&prev_type, &type)) - return type; - } - - return type; -} - -/* Get the MSR pair relating to a var range */ -static void -get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) -{ - rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); -} - -/* Fill the MSR pair relating to a var range */ -void fill_mtrr_var_range(unsigned int index, - u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) -{ - struct mtrr_var_range *vr; - - vr = mtrr_state.var_ranges; - - vr[index].base_lo = base_lo; - vr[index].base_hi = base_hi; - vr[index].mask_lo = mask_lo; - vr[index].mask_hi = mask_hi; -} - -static void get_fixed_ranges(mtrr_type *frs) -{ - unsigned int *p = (unsigned int *)frs; - int i; - - k8_check_syscfg_dram_mod_en(); - - rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]); - - for (i = 0; i < 2; i++) - rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]); - for (i = 0; i < 8; i++) - rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]); -} - -void mtrr_save_fixed_ranges(void *info) -{ - if (cpu_has_mtrr) - get_fixed_ranges(mtrr_state.fixed_ranges); -} - -static unsigned __initdata last_fixed_start; -static unsigned __initdata last_fixed_end; -static mtrr_type __initdata last_fixed_type; - -static void __init print_fixed_last(void) -{ - if (!last_fixed_end) - return; - - pr_debug(" %05X-%05X %s\n", last_fixed_start, - last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); - - last_fixed_end = 0; -} - -static void __init update_fixed_last(unsigned base, unsigned end, - mtrr_type type) -{ - last_fixed_start = base; - last_fixed_end = end; - last_fixed_type = type; -} - -static void __init -print_fixed(unsigned base, unsigned step, const mtrr_type *types) -{ - unsigned i; - - for (i = 0; i < 8; ++i, ++types, base += step) { - if (last_fixed_end == 0) { - update_fixed_last(base, base + step, *types); - continue; - } - if (last_fixed_end == base && last_fixed_type == *types) { - last_fixed_end = base + step; - continue; - } - /* new segments: gap or different type */ - print_fixed_last(); - update_fixed_last(base, base + step, *types); - } -} - -static void prepare_set(void); -static void post_set(void); - -static void __init print_mtrr_state(void) -{ - unsigned int i; - int high_width; - - pr_debug("MTRR default type: %s\n", - mtrr_attrib_to_str(mtrr_state.def_type)); - if (mtrr_state.have_fixed) { - pr_debug("MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); - print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); - for (i = 0; i < 2; ++i) - print_fixed(0x80000 + i * 0x20000, 0x04000, - mtrr_state.fixed_ranges + (i + 1) * 8); - for (i = 0; i < 8; ++i) - print_fixed(0xC0000 + i * 0x08000, 0x01000, - mtrr_state.fixed_ranges + (i + 3) * 8); - - /* tail */ - print_fixed_last(); - } - pr_debug("MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); - if (size_or_mask & 0xffffffffUL) - high_width = ffs(size_or_mask & 0xffffffffUL) - 1; - else - high_width = ffs(size_or_mask>>32) + 32 - 1; - high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; - - for (i = 0; i < num_var_ranges; ++i) { - if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", - i, - high_width, - mtrr_state.var_ranges[i].base_hi, - mtrr_state.var_ranges[i].base_lo >> 12, - high_width, - mtrr_state.var_ranges[i].mask_hi, - mtrr_state.var_ranges[i].mask_lo >> 12, - mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); - else - pr_debug(" %u disabled\n", i); - } - if (mtrr_tom2) - pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); -} - -/* Grab all of the MTRR state for this CPU into *state */ -void __init get_mtrr_state(void) -{ - struct mtrr_var_range *vrs; - unsigned long flags; - unsigned lo, dummy; - unsigned int i; - - vrs = mtrr_state.var_ranges; - - rdmsr(MSR_MTRRcap, lo, dummy); - mtrr_state.have_fixed = (lo >> 8) & 1; - - for (i = 0; i < num_var_ranges; i++) - get_mtrr_var_range(i, &vrs[i]); - if (mtrr_state.have_fixed) - get_fixed_ranges(mtrr_state.fixed_ranges); - - rdmsr(MSR_MTRRdefType, lo, dummy); - mtrr_state.def_type = (lo & 0xff); - mtrr_state.enabled = (lo & 0xc00) >> 10; - - if (amd_special_default_mtrr()) { - unsigned low, high; - - /* TOP_MEM2 */ - rdmsr(MSR_K8_TOP_MEM2, low, high); - mtrr_tom2 = high; - mtrr_tom2 <<= 32; - mtrr_tom2 |= low; - mtrr_tom2 &= 0xffffff800000ULL; - } - - print_mtrr_state(); - - mtrr_state_set = 1; - - /* PAT setup for BP. We need to go through sync steps here */ - local_irq_save(flags); - prepare_set(); - - pat_init(); - - post_set(); - local_irq_restore(flags); -} - -/* Some BIOS's are messed up and don't set all MTRRs the same! */ -void __init mtrr_state_warn(void) -{ - unsigned long mask = smp_changes_mask; - - if (!mask) - return; - if (mask & MTRR_CHANGE_MASK_FIXED) - pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); - if (mask & MTRR_CHANGE_MASK_VARIABLE) - pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n"); - if (mask & MTRR_CHANGE_MASK_DEFTYPE) - pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); - - printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); - printk(KERN_INFO "mtrr: corrected configuration.\n"); -} - -/* - * Doesn't attempt to pass an error out to MTRR users - * because it's quite complicated in some cases and probably not - * worth it because the best error handling is to ignore it. - */ -void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) -{ - if (wrmsr_safe(msr, a, b) < 0) { - printk(KERN_ERR - "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", - smp_processor_id(), msr, a, b); - } -} - -/** - * set_fixed_range - checks & updates a fixed-range MTRR if it - * differs from the value it should have - * @msr: MSR address of the MTTR which should be checked and updated - * @changed: pointer which indicates whether the MTRR needed to be changed - * @msrwords: pointer to the MSR values which the MSR should have - */ -static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) -{ - unsigned lo, hi; - - rdmsr(msr, lo, hi); - - if (lo != msrwords[0] || hi != msrwords[1]) { - mtrr_wrmsr(msr, msrwords[0], msrwords[1]); - *changed = true; - } -} - -/** - * generic_get_free_region - Get a free MTRR. - * @base: The starting (base) address of the region. - * @size: The size (in bytes) of the region. - * @replace_reg: mtrr index to be replaced; set to invalid value if none. - * - * Returns: The index of the region on success, else negative on error. - */ -int -generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) -{ - unsigned long lbase, lsize; - mtrr_type ltype; - int i, max; - - max = num_var_ranges; - if (replace_reg >= 0 && replace_reg < max) - return replace_reg; - - for (i = 0; i < max; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - - return -ENOSPC; -} - -static void generic_get_mtrr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type *type) -{ - unsigned int mask_lo, mask_hi, base_lo, base_hi; - unsigned int tmp, hi; - - /* - * get_mtrr doesn't need to update mtrr_state, also it could be called - * from any cpu, so try to print it out directly. - */ - get_cpu(); - - rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); - - if ((mask_lo & 0x800) == 0) { - /* Invalid (i.e. free) range */ - *base = 0; - *size = 0; - *type = 0; - goto out_put_cpu; - } - - rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); - - /* Work out the shifted address mask: */ - tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; - mask_lo = size_or_mask | tmp; - - /* Expand tmp with high bits to all 1s: */ - hi = fls(tmp); - if (hi > 0) { - tmp |= ~((1<<(hi - 1)) - 1); - - if (tmp != mask_lo) { - printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); - add_taint(TAINT_FIRMWARE_WORKAROUND); - mask_lo = tmp; - } - } - - /* - * This works correctly if size is a power of two, i.e. a - * contiguous range: - */ - *size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; - *type = base_lo & 0xff; - -out_put_cpu: - put_cpu(); -} - -/** - * set_fixed_ranges - checks & updates the fixed-range MTRRs if they - * differ from the saved set - * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() - */ -static int set_fixed_ranges(mtrr_type *frs) -{ - unsigned long long *saved = (unsigned long long *)frs; - bool changed = false; - int block = -1, range; - - k8_check_syscfg_dram_mod_en(); - - while (fixed_range_blocks[++block].ranges) { - for (range = 0; range < fixed_range_blocks[block].ranges; range++) - set_fixed_range(fixed_range_blocks[block].base_msr + range, - &changed, (unsigned int *)saved++); - } - - return changed; -} - -/* - * Set the MSR pair relating to a var range. - * Returns true if changes are made. - */ -static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) -{ - unsigned int lo, hi; - bool changed = false; - - rdmsr(MTRRphysBase_MSR(index), lo, hi); - if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) - || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { - - mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - changed = true; - } - - rdmsr(MTRRphysMask_MSR(index), lo, hi); - - if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) - || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { - mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); - changed = true; - } - return changed; -} - -static u32 deftype_lo, deftype_hi; - -/** - * set_mtrr_state - Set the MTRR state for this CPU. - * - * NOTE: The CPU must already be in a safe state for MTRR changes. - * RETURNS: 0 if no changes made, else a mask indicating what was changed. - */ -static unsigned long set_mtrr_state(void) -{ - unsigned long change_mask = 0; - unsigned int i; - - for (i = 0; i < num_var_ranges; i++) { - if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) - change_mask |= MTRR_CHANGE_MASK_VARIABLE; - } - - if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) - change_mask |= MTRR_CHANGE_MASK_FIXED; - - /* - * Set_mtrr_restore restores the old value of MTRRdefType, - * so to set it we fiddle with the saved value: - */ - if ((deftype_lo & 0xff) != mtrr_state.def_type - || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { - - deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | - (mtrr_state.enabled << 10); - change_mask |= MTRR_CHANGE_MASK_DEFTYPE; - } - - return change_mask; -} - - -static unsigned long cr4; -static DEFINE_RAW_SPINLOCK(set_atomicity_lock); - -/* - * Since we are disabling the cache don't allow any interrupts, - * they would run extremely slow and would only increase the pain. - * - * The caller must ensure that local interrupts are disabled and - * are reenabled after post_set() has been called. - */ -static void prepare_set(void) __acquires(set_atomicity_lock) -{ - unsigned long cr0; - - /* - * Note that this is not ideal - * since the cache is only flushed/disabled for this CPU while the - * MTRRs are changed, but changing this requires more invasive - * changes to the way the kernel boots - */ - - raw_spin_lock(&set_atomicity_lock); - - /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ - cr0 = read_cr0() | X86_CR0_CD; - write_cr0(cr0); - wbinvd(); - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); - } - - /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ - __flush_tlb(); - - /* Save MTRR state */ - rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); - - /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); - wbinvd(); -} - -static void post_set(void) __releases(set_atomicity_lock) -{ - /* Flush TLBs (no need to flush caches - they are disabled) */ - __flush_tlb(); - - /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if (cpu_has_pge) - write_cr4(cr4); - raw_spin_unlock(&set_atomicity_lock); -} - -static void generic_set_all(void) -{ - unsigned long mask, count; - unsigned long flags; - - local_irq_save(flags); - prepare_set(); - - /* Actually set the state */ - mask = set_mtrr_state(); - - /* also set PAT */ - pat_init(); - - post_set(); - local_irq_restore(flags); - - /* Use the atomic bitops to update the global mask */ - for (count = 0; count < sizeof mask * 8; ++count) { - if (mask & 0x01) - set_bit(count, &smp_changes_mask); - mask >>= 1; - } - -} - -/** - * generic_set_mtrr - set variable MTRR register on the local CPU. - * - * @reg: The register to set. - * @base: The base address of the region. - * @size: The size of the region. If this is 0 the region is disabled. - * @type: The type of the region. - * - * Returns nothing. - */ -static void generic_set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - unsigned long flags; - struct mtrr_var_range *vr; - - vr = &mtrr_state.var_ranges[reg]; - - local_irq_save(flags); - prepare_set(); - - if (size == 0) { - /* - * The invalid bit is kept in the mask, so we simply - * clear the relevant mask register to disable a range. - */ - mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); - memset(vr, 0, sizeof(struct mtrr_var_range)); - } else { - vr->base_lo = base << PAGE_SHIFT | type; - vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); - vr->mask_lo = -size << PAGE_SHIFT | 0x800; - vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); - - mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); - mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); - } - - post_set(); - local_irq_restore(flags); -} - -int generic_validate_add_page(unsigned long base, unsigned long size, - unsigned int type) -{ - unsigned long lbase, last; - - /* - * For Intel PPro stepping <= 7 - * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF - */ - if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { - if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); - return -EINVAL; - } - if (!(base + size < 0x70000 || base > 0x7003F) && - (type == MTRR_TYPE_WRCOMB - || type == MTRR_TYPE_WRBACK)) { - pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); - return -EINVAL; - } - } - - /* - * Check upper bits of base and last are equal and lower bits are 0 - * for base and 1 for last - */ - last = base + size - 1; - for (lbase = base; !(lbase & 1) && (last & 1); - lbase = lbase >> 1, last = last >> 1) - ; - if (lbase != last) { - pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); - return -EINVAL; - } - return 0; -} - -static int generic_have_wrcomb(void) -{ - unsigned long config, dummy; - rdmsr(MSR_MTRRcap, config, dummy); - return config & (1 << 10); -} - -int positive_have_wrcomb(void) -{ - return 1; -} - -/* - * Generic structure... - */ -const struct mtrr_ops generic_mtrr_ops = { - .use_intel_if = 1, - .set_all = generic_set_all, - .get = generic_get_mtrr, - .get_free_region = generic_get_free_region, - .set = generic_set_mtrr, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = generic_have_wrcomb, -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/if.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/if.c deleted file mode 100644 index a041e094..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/if.c +++ /dev/null @@ -1,451 +0,0 @@ -#include <linux/capability.h> -#include <linux/seq_file.h> -#include <linux/uaccess.h> -#include <linux/proc_fs.h> -#include <linux/module.h> -#include <linux/ctype.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/init.h> - -#define LINE_SIZE 80 - -#include <asm/mtrr.h> - -#include "mtrr.h" - -#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) - -static const char *const mtrr_strings[MTRR_NUM_TYPES] = -{ - "uncachable", /* 0 */ - "write-combining", /* 1 */ - "?", /* 2 */ - "?", /* 3 */ - "write-through", /* 4 */ - "write-protect", /* 5 */ - "write-back", /* 6 */ -}; - -const char *mtrr_attrib_to_str(int x) -{ - return (x <= 6) ? mtrr_strings[x] : "?"; -} - -#ifdef CONFIG_PROC_FS - -static int -mtrr_file_add(unsigned long base, unsigned long size, - unsigned int type, bool increment, struct file *file, int page) -{ - unsigned int *fcount = FILE_FCOUNT(file); - int reg, max; - - max = num_var_ranges; - if (fcount == NULL) { - fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); - if (!fcount) - return -ENOMEM; - FILE_FCOUNT(file) = fcount; - } - if (!page) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) - return -EINVAL; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - } - reg = mtrr_add_page(base, size, type, true); - if (reg >= 0) - ++fcount[reg]; - return reg; -} - -static int -mtrr_file_del(unsigned long base, unsigned long size, - struct file *file, int page) -{ - unsigned int *fcount = FILE_FCOUNT(file); - int reg; - - if (!page) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) - return -EINVAL; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - } - reg = mtrr_del_page(-1, base, size); - if (reg < 0) - return reg; - if (fcount == NULL) - return reg; - if (fcount[reg] < 1) - return -EINVAL; - --fcount[reg]; - return reg; -} - -/* - * seq_file can seek but we ignore it. - * - * Format of control line: - * "base=%Lx size=%Lx type=%s" or "disable=%d" - */ -static ssize_t -mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) -{ - int i, err; - unsigned long reg; - unsigned long long base, size; - char *ptr; - char line[LINE_SIZE]; - int length; - size_t linelen; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - memset(line, 0, LINE_SIZE); - - length = len; - length--; - - if (length > LINE_SIZE - 1) - length = LINE_SIZE - 1; - - if (length < 0) - return -EINVAL; - - if (copy_from_user(line, buf, length)) - return -EFAULT; - - linelen = strlen(line); - ptr = line + linelen - 1; - if (linelen && *ptr == '\n') - *ptr = '\0'; - - if (!strncmp(line, "disable=", 8)) { - reg = simple_strtoul(line + 8, &ptr, 0); - err = mtrr_del_page(reg, 0, 0); - if (err < 0) - return err; - return len; - } - - if (strncmp(line, "base=", 5)) - return -EINVAL; - - base = simple_strtoull(line + 5, &ptr, 0); - ptr = skip_spaces(ptr); - - if (strncmp(ptr, "size=", 5)) - return -EINVAL; - - size = simple_strtoull(ptr + 5, &ptr, 0); - if ((base & 0xfff) || (size & 0xfff)) - return -EINVAL; - ptr = skip_spaces(ptr); - - if (strncmp(ptr, "type=", 5)) - return -EINVAL; - ptr = skip_spaces(ptr + 5); - - for (i = 0; i < MTRR_NUM_TYPES; ++i) { - if (strcmp(ptr, mtrr_strings[i])) - continue; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); - if (err < 0) - return err; - return len; - } - return -EINVAL; -} - -static long -mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) -{ - int err = 0; - mtrr_type type; - unsigned long base; - unsigned long size; - struct mtrr_sentry sentry; - struct mtrr_gentry gentry; - void __user *arg = (void __user *) __arg; - - switch (cmd) { - case MTRRIOC_ADD_ENTRY: - case MTRRIOC_SET_ENTRY: - case MTRRIOC_DEL_ENTRY: - case MTRRIOC_KILL_ENTRY: - case MTRRIOC_ADD_PAGE_ENTRY: - case MTRRIOC_SET_PAGE_ENTRY: - case MTRRIOC_DEL_PAGE_ENTRY: - case MTRRIOC_KILL_PAGE_ENTRY: - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; - break; - case MTRRIOC_GET_ENTRY: - case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; - break; -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_ENTRY: - case MTRRIOC32_SET_ENTRY: - case MTRRIOC32_DEL_ENTRY: - case MTRRIOC32_KILL_ENTRY: - case MTRRIOC32_ADD_PAGE_ENTRY: - case MTRRIOC32_SET_PAGE_ENTRY: - case MTRRIOC32_DEL_PAGE_ENTRY: - case MTRRIOC32_KILL_PAGE_ENTRY: { - struct mtrr_sentry32 __user *s32; - - s32 = (struct mtrr_sentry32 __user *)__arg; - err = get_user(sentry.base, &s32->base); - err |= get_user(sentry.size, &s32->size); - err |= get_user(sentry.type, &s32->type); - if (err) - return err; - break; - } - case MTRRIOC32_GET_ENTRY: - case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32; - - g32 = (struct mtrr_gentry32 __user *)__arg; - err = get_user(gentry.regnum, &g32->regnum); - err |= get_user(gentry.base, &g32->base); - err |= get_user(gentry.size, &g32->size); - err |= get_user(gentry.type, &g32->type); - if (err) - return err; - break; - } -#endif - } - - switch (cmd) { - default: - return -ENOTTY; - case MTRRIOC_ADD_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, true, - file, 0); - break; - case MTRRIOC_SET_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_SET_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_add(sentry.base, sentry.size, sentry.type, false); - break; - case MTRRIOC_DEL_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_DEL_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_file_del(sentry.base, sentry.size, file, 0); - break; - case MTRRIOC_KILL_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_KILL_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_del(-1, sentry.base, sentry.size); - break; - case MTRRIOC_GET_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_ENTRY: -#endif - if (gentry.regnum >= num_var_ranges) - return -EINVAL; - mtrr_if->get(gentry.regnum, &base, &size, &type); - - /* Hide entries that go above 4GB */ - if (base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) - || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) - gentry.base = gentry.size = gentry.type = 0; - else { - gentry.base = base << PAGE_SHIFT; - gentry.size = size << PAGE_SHIFT; - gentry.type = type; - } - - break; - case MTRRIOC_ADD_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, true, - file, 1); - break; - case MTRRIOC_SET_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_SET_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = - mtrr_add_page(sentry.base, sentry.size, sentry.type, false); - break; - case MTRRIOC_DEL_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_DEL_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_file_del(sentry.base, sentry.size, file, 1); - break; - case MTRRIOC_KILL_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_KILL_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_del_page(-1, sentry.base, sentry.size); - break; - case MTRRIOC_GET_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_PAGE_ENTRY: -#endif - if (gentry.regnum >= num_var_ranges) - return -EINVAL; - mtrr_if->get(gentry.regnum, &base, &size, &type); - /* Hide entries that would overflow */ - if (size != (__typeof__(gentry.size))size) - gentry.base = gentry.size = gentry.type = 0; - else { - gentry.base = base; - gentry.size = size; - gentry.type = type; - } - break; - } - - if (err) - return err; - - switch (cmd) { - case MTRRIOC_GET_ENTRY: - case MTRRIOC_GET_PAGE_ENTRY: - if (copy_to_user(arg, &gentry, sizeof gentry)) - err = -EFAULT; - break; -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_ENTRY: - case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32; - - g32 = (struct mtrr_gentry32 __user *)__arg; - err = put_user(gentry.base, &g32->base); - err |= put_user(gentry.size, &g32->size); - err |= put_user(gentry.regnum, &g32->regnum); - err |= put_user(gentry.type, &g32->type); - break; - } -#endif - } - return err; -} - -static int mtrr_close(struct inode *ino, struct file *file) -{ - unsigned int *fcount = FILE_FCOUNT(file); - int i, max; - - if (fcount != NULL) { - max = num_var_ranges; - for (i = 0; i < max; ++i) { - while (fcount[i] > 0) { - mtrr_del(i, 0, 0); - --fcount[i]; - } - } - kfree(fcount); - FILE_FCOUNT(file) = NULL; - } - return single_release(ino, file); -} - -static int mtrr_seq_show(struct seq_file *seq, void *offset); - -static int mtrr_open(struct inode *inode, struct file *file) -{ - if (!mtrr_if) - return -EIO; - if (!mtrr_if->get) - return -ENXIO; - return single_open(file, mtrr_seq_show, NULL); -} - -static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, -}; - -static int mtrr_seq_show(struct seq_file *seq, void *offset) -{ - char factor; - int i, max, len; - mtrr_type type; - unsigned long base, size; - - len = 0; - max = num_var_ranges; - for (i = 0; i < max; i++) { - mtrr_if->get(i, &base, &size, &type); - if (size == 0) { - mtrr_usage_table[i] = 0; - continue; - } - if (size < (0x100000 >> PAGE_SHIFT)) { - /* less than 1MB */ - factor = 'K'; - size <<= PAGE_SHIFT - 10; - } else { - factor = 'M'; - size >>= 20 - PAGE_SHIFT; - } - /* Base can be > 32bit */ - len += seq_printf(seq, "reg%02i: base=0x%06lx000 " - "(%5luMB), size=%5lu%cB, count=%d: %s\n", - i, base, base >> (20 - PAGE_SHIFT), size, - factor, mtrr_usage_table[i], - mtrr_attrib_to_str(type)); - } - return 0; -} - -static int __init mtrr_if_init(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - if ((!cpu_has(c, X86_FEATURE_MTRR)) && - (!cpu_has(c, X86_FEATURE_K6_MTRR)) && - (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && - (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) - return -ENODEV; - - proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); - return 0; -} -arch_initcall(mtrr_if_init); -#endif /* CONFIG_PROC_FS */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/main.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/main.c deleted file mode 100644 index 6b96110b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/main.c +++ /dev/null @@ -1,764 +0,0 @@ -/* Generic MTRR (Memory Type Range Register) driver. - - Copyright (C) 1997-2000 Richard Gooch - Copyright (c) 2002 Patrick Mochel - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Richard Gooch may be reached by email at rgooch@atnf.csiro.au - The postal address is: - Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. - - Source: "Pentium Pro Family Developer's Manual, Volume 3: - Operating System Writer's Guide" (Intel document number 242692), - section 11.11.7 - - This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> - on 6-7 March 2002. - Source: Intel Architecture Software Developers Manual, Volume 3: - System Programming Guide; Section 9.11. (1997 edition - PPro). -*/ - -#define DEBUG - -#include <linux/types.h> /* FIXME: kvm_para.h needs this */ - -#include <linux/stop_machine.h> -#include <linux/kvm_para.h> -#include <linux/uaccess.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/init.h> -#include <linux/sort.h> -#include <linux/cpu.h> -#include <linux/pci.h> -#include <linux/smp.h> -#include <linux/syscore_ops.h> - -#include <asm/processor.h> -#include <asm/e820.h> -#include <asm/mtrr.h> -#include <asm/msr.h> - -#include "mtrr.h" - -u32 num_var_ranges; - -unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; -static DEFINE_MUTEX(mtrr_mutex); - -u64 size_or_mask, size_and_mask; -static bool mtrr_aps_delayed_init; - -static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; - -const struct mtrr_ops *mtrr_if; - -static void set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type); - -void set_mtrr_ops(const struct mtrr_ops *ops) -{ - if (ops->vendor && ops->vendor < X86_VENDOR_NUM) - mtrr_ops[ops->vendor] = ops; -} - -/* Returns non-zero if we have the write-combining memory type */ -static int have_wrcomb(void) -{ - struct pci_dev *dev; - - dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); - if (dev != NULL) { - /* - * ServerWorks LE chipsets < rev 6 have problems with - * write-combining. Don't allow it and leave room for other - * chipsets to be tagged - */ - if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && - dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && - dev->revision <= 5) { - pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); - pci_dev_put(dev); - return 0; - } - /* - * Intel 450NX errata # 23. Non ascending cacheline evictions to - * write combining memory may resulting in data corruption - */ - if (dev->vendor == PCI_VENDOR_ID_INTEL && - dev->device == PCI_DEVICE_ID_INTEL_82451NX) { - pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); - pci_dev_put(dev); - return 0; - } - pci_dev_put(dev); - } - return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; -} - -/* This function returns the number of variable MTRRs */ -static void __init set_num_var_ranges(void) -{ - unsigned long config = 0, dummy; - - if (use_intel()) - rdmsr(MSR_MTRRcap, config, dummy); - else if (is_cpu(AMD)) - config = 2; - else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) - config = 8; - - num_var_ranges = config & 0xff; -} - -static void __init init_table(void) -{ - int i, max; - - max = num_var_ranges; - for (i = 0; i < max; i++) - mtrr_usage_table[i] = 1; -} - -struct set_mtrr_data { - unsigned long smp_base; - unsigned long smp_size; - unsigned int smp_reg; - mtrr_type smp_type; -}; - -/** - * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed - * by all the CPUs. - * @info: pointer to mtrr configuration data - * - * Returns nothing. - */ -static int mtrr_rendezvous_handler(void *info) -{ - struct set_mtrr_data *data = info; - - /* - * We use this same function to initialize the mtrrs during boot, - * resume, runtime cpu online and on an explicit request to set a - * specific MTRR. - * - * During boot or suspend, the state of the boot cpu's mtrrs has been - * saved, and we want to replicate that across all the cpus that come - * online (either at the end of boot or resume or during a runtime cpu - * online). If we're doing that, @reg is set to something special and on - * all the cpu's we do mtrr_if->set_all() (On the logical cpu that - * started the boot/resume sequence, this might be a duplicate - * set_all()). - */ - if (data->smp_reg != ~0U) { - mtrr_if->set(data->smp_reg, data->smp_base, - data->smp_size, data->smp_type); - } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) { - mtrr_if->set_all(); - } - return 0; -} - -static inline int types_compatible(mtrr_type type1, mtrr_type type2) -{ - return type1 == MTRR_TYPE_UNCACHABLE || - type2 == MTRR_TYPE_UNCACHABLE || - (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || - (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); -} - -/** - * set_mtrr - update mtrrs on all processors - * @reg: mtrr in question - * @base: mtrr base - * @size: mtrr size - * @type: mtrr type - * - * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: - * - * 1. Queue work to do the following on all processors: - * 2. Disable Interrupts - * 3. Wait for all procs to do so - * 4. Enter no-fill cache mode - * 5. Flush caches - * 6. Clear PGE bit - * 7. Flush all TLBs - * 8. Disable all range registers - * 9. Update the MTRRs - * 10. Enable all range registers - * 11. Flush all TLBs and caches again - * 12. Enter normal cache mode and reenable caching - * 13. Set PGE - * 14. Wait for buddies to catch up - * 15. Enable interrupts. - * - * What does that mean for us? Well, stop_machine() will ensure that - * the rendezvous handler is started on each CPU. And in lockstep they - * do the state transition of disabling interrupts, updating MTRR's - * (the CPU vendors may each do it differently, so we call mtrr_if->set() - * callback and let them take care of it.) and enabling interrupts. - * - * Note that the mechanism is the same for UP systems, too; all the SMP stuff - * becomes nops. - */ -static void -set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) -{ - struct set_mtrr_data data = { .smp_reg = reg, - .smp_base = base, - .smp_size = size, - .smp_type = type - }; - - stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); -} - -static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - struct set_mtrr_data data = { .smp_reg = reg, - .smp_base = base, - .smp_size = size, - .smp_type = type - }; - - stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data, - cpu_callout_mask); -} - -/** - * mtrr_add_page - Add a memory type region - * @base: Physical base address of region in pages (in units of 4 kB!) - * @size: Physical size of region in pages (4 kB) - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region - * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. - * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. - * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. - * - * The available types are - * - * %MTRR_TYPE_UNCACHABLE - No caching - * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever - * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts - * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes - * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. - */ -int mtrr_add_page(unsigned long base, unsigned long size, - unsigned int type, bool increment) -{ - unsigned long lbase, lsize; - int i, replace, error; - mtrr_type ltype; - - if (!mtrr_if) - return -ENXIO; - - error = mtrr_if->validate_add_page(base, size, type); - if (error) - return error; - - if (type >= MTRR_NUM_TYPES) { - pr_warning("mtrr: type: %u invalid\n", type); - return -EINVAL; - } - - /* If the type is WC, check that this processor supports it */ - if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - pr_warning("mtrr: your processor doesn't support write-combining\n"); - return -ENOSYS; - } - - if (!size) { - pr_warning("mtrr: zero sized request\n"); - return -EINVAL; - } - - if (base & size_or_mask || size & size_or_mask) { - pr_warning("mtrr: base or size exceeds the MTRR width\n"); - return -EINVAL; - } - - error = -EINVAL; - replace = -1; - - /* No CPU hotplug when we change MTRR entries */ - get_online_cpus(); - - /* Search for existing MTRR */ - mutex_lock(&mtrr_mutex); - for (i = 0; i < num_var_ranges; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (!lsize || base > lbase + lsize - 1 || - base + size - 1 < lbase) - continue; - /* - * At this point we know there is some kind of - * overlap/enclosure - */ - if (base < lbase || base + size - 1 > lbase + lsize - 1) { - if (base <= lbase && - base + size - 1 >= lbase + lsize - 1) { - /* New region encloses an existing region */ - if (type == ltype) { - replace = replace == -1 ? i : -2; - continue; - } else if (types_compatible(type, ltype)) - continue; - } - pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing" - " 0x%lx000,0x%lx000\n", base, size, lbase, - lsize); - goto out; - } - /* New region is enclosed by an existing region */ - if (ltype != type) { - if (types_compatible(type, ltype)) - continue; - pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", - base, size, mtrr_attrib_to_str(ltype), - mtrr_attrib_to_str(type)); - goto out; - } - if (increment) - ++mtrr_usage_table[i]; - error = i; - goto out; - } - /* Search for an empty MTRR */ - i = mtrr_if->get_free_region(base, size, replace); - if (i >= 0) { - set_mtrr(i, base, size, type); - if (likely(replace < 0)) { - mtrr_usage_table[i] = 1; - } else { - mtrr_usage_table[i] = mtrr_usage_table[replace]; - if (increment) - mtrr_usage_table[i]++; - if (unlikely(replace != i)) { - set_mtrr(replace, 0, 0, 0); - mtrr_usage_table[replace] = 0; - } - } - } else { - pr_info("mtrr: no more MTRRs available\n"); - } - error = i; - out: - mutex_unlock(&mtrr_mutex); - put_online_cpus(); - return error; -} - -static int mtrr_check(unsigned long base, unsigned long size) -{ - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); - pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); - dump_stack(); - return -1; - } - return 0; -} - -/** - * mtrr_add - Add a memory type region - * @base: Physical base address of region - * @size: Physical size of region - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region - * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. - * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. - * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. - * - * The available types are - * - * %MTRR_TYPE_UNCACHABLE - No caching - * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever - * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts - * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes - * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. - */ -int mtrr_add(unsigned long base, unsigned long size, unsigned int type, - bool increment) -{ - if (mtrr_check(base, size)) - return -EINVAL; - return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, - increment); -} -EXPORT_SYMBOL(mtrr_add); - -/** - * mtrr_del_page - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region - * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. - * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. - */ -int mtrr_del_page(int reg, unsigned long base, unsigned long size) -{ - int i, max; - mtrr_type ltype; - unsigned long lbase, lsize; - int error = -EINVAL; - - if (!mtrr_if) - return -ENXIO; - - max = num_var_ranges; - /* No CPU hotplug when we change MTRR entries */ - get_online_cpus(); - mutex_lock(&mtrr_mutex); - if (reg < 0) { - /* Search for existing MTRR */ - for (i = 0; i < max; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lbase == base && lsize == size) { - reg = i; - break; - } - } - if (reg < 0) { - pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n", - base, size); - goto out; - } - } - if (reg >= max) { - pr_warning("mtrr: register: %d too big\n", reg); - goto out; - } - mtrr_if->get(reg, &lbase, &lsize, <ype); - if (lsize < 1) { - pr_warning("mtrr: MTRR %d not used\n", reg); - goto out; - } - if (mtrr_usage_table[reg] < 1) { - pr_warning("mtrr: reg: %d has count=0\n", reg); - goto out; - } - if (--mtrr_usage_table[reg] < 1) - set_mtrr(reg, 0, 0, 0); - error = reg; - out: - mutex_unlock(&mtrr_mutex); - put_online_cpus(); - return error; -} - -/** - * mtrr_del - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region - * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. - * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. - */ -int mtrr_del(int reg, unsigned long base, unsigned long size) -{ - if (mtrr_check(base, size)) - return -EINVAL; - return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); -} -EXPORT_SYMBOL(mtrr_del); - -/* - * HACK ALERT! - * These should be called implicitly, but we can't yet until all the initcall - * stuff is done... - */ -static void __init init_ifs(void) -{ -#ifndef CONFIG_X86_64 - amd_init_mtrr(); - cyrix_init_mtrr(); - centaur_init_mtrr(); -#endif -} - -/* The suspend/resume methods are only for CPU without MTRR. CPU using generic - * MTRR driver doesn't require this - */ -struct mtrr_value { - mtrr_type ltype; - unsigned long lbase; - unsigned long lsize; -}; - -static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; - -static int mtrr_save(void) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &mtrr_value[i].lbase, - &mtrr_value[i].lsize, - &mtrr_value[i].ltype); - } - return 0; -} - -static void mtrr_restore(void) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - if (mtrr_value[i].lsize) { - set_mtrr(i, mtrr_value[i].lbase, - mtrr_value[i].lsize, - mtrr_value[i].ltype); - } - } -} - - - -static struct syscore_ops mtrr_syscore_ops = { - .suspend = mtrr_save, - .resume = mtrr_restore, -}; - -int __initdata changed_by_mtrr_cleanup; - -/** - * mtrr_bp_init - initialize mtrrs on the boot CPU - * - * This needs to be called early; before any of the other CPUs are - * initialized (i.e. before smp_init()). - * - */ -void __init mtrr_bp_init(void) -{ - u32 phys_addr; - - init_ifs(); - - phys_addr = 32; - - if (cpu_has_mtrr) { - mtrr_if = &generic_mtrr_ops; - size_or_mask = 0xff000000; /* 36 bits */ - size_and_mask = 0x00f00000; - phys_addr = 36; - - /* - * This is an AMD specific MSR, but we assume(hope?) that - * Intel will implement it to when they extend the address - * bus of the Xeon. - */ - if (cpuid_eax(0x80000000) >= 0x80000008) { - phys_addr = cpuid_eax(0x80000008) & 0xff; - /* CPUID workaround for Intel 0F33/0F34 CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 0xF && - boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) - phys_addr = 36; - - size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); - size_and_mask = ~size_or_mask & 0xfffff00000ULL; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && - boot_cpu_data.x86 == 6) { - /* - * VIA C* family have Intel style MTRRs, - * but don't support PAE - */ - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - phys_addr = 32; - } - } else { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (cpu_has_k6_mtrr) { - /* Pre-Athlon (K6) AMD CPU MTRRs */ - mtrr_if = mtrr_ops[X86_VENDOR_AMD]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - case X86_VENDOR_CENTAUR: - if (cpu_has_centaur_mcr) { - mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - case X86_VENDOR_CYRIX: - if (cpu_has_cyrix_arr) { - mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - default: - break; - } - } - - if (mtrr_if) { - set_num_var_ranges(); - init_table(); - if (use_intel()) { - get_mtrr_state(); - - if (mtrr_cleanup(phys_addr)) { - changed_by_mtrr_cleanup = 1; - mtrr_if->set_all(); - } - } - } -} - -void mtrr_ap_init(void) -{ - if (!use_intel() || mtrr_aps_delayed_init) - return; - /* - * Ideally we should hold mtrr_mutex here to avoid mtrr entries - * changed, but this routine will be called in cpu boot time, - * holding the lock breaks it. - * - * This routine is called in two cases: - * - * 1. very earily time of software resume, when there absolutely - * isn't mtrr entry changes; - * - * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug - * lock to prevent mtrr entry changes - */ - set_mtrr_from_inactive_cpu(~0U, 0, 0, 0); -} - -/** - * Save current fixed-range MTRR state of the BSP - */ -void mtrr_save_state(void) -{ - smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); -} - -void set_mtrr_aps_delayed_init(void) -{ - if (!use_intel()) - return; - - mtrr_aps_delayed_init = true; -} - -/* - * Delayed MTRR initialization for all AP's - */ -void mtrr_aps_init(void) -{ - if (!use_intel()) - return; - - /* - * Check if someone has requested the delay of AP MTRR initialization, - * by doing set_mtrr_aps_delayed_init(), prior to this point. If not, - * then we are done. - */ - if (!mtrr_aps_delayed_init) - return; - - set_mtrr(~0U, 0, 0, 0); - mtrr_aps_delayed_init = false; -} - -void mtrr_bp_restore(void) -{ - if (!use_intel()) - return; - - mtrr_if->set_all(); -} - -static int __init mtrr_init_finialize(void) -{ - if (!mtrr_if) - return 0; - - if (use_intel()) { - if (!changed_by_mtrr_cleanup) - mtrr_state_warn(); - return 0; - } - - /* - * The CPU has no MTRR and seems to not support SMP. They have - * specific drivers, we use a tricky method to support - * suspend/resume for them. - * - * TBD: is there any system with such CPU which supports - * suspend/resume? If no, we should remove the code. - */ - register_syscore_ops(&mtrr_syscore_ops); - - return 0; -} -subsys_initcall(mtrr_init_finialize); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/mtrr.h b/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/mtrr.h deleted file mode 100644 index df5e41f3..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/mtrr/mtrr.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * local MTRR defines. - */ - -#include <linux/types.h> -#include <linux/stddef.h> - -#define MTRR_CHANGE_MASK_FIXED 0x01 -#define MTRR_CHANGE_MASK_VARIABLE 0x02 -#define MTRR_CHANGE_MASK_DEFTYPE 0x04 - -extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; - -struct mtrr_ops { - u32 vendor; - u32 use_intel_if; - void (*set)(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type); - void (*set_all)(void); - - void (*get)(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type *type); - int (*get_free_region)(unsigned long base, unsigned long size, - int replace_reg); - int (*validate_add_page)(unsigned long base, unsigned long size, - unsigned int type); - int (*have_wrcomb)(void); -}; - -extern int generic_get_free_region(unsigned long base, unsigned long size, - int replace_reg); -extern int generic_validate_add_page(unsigned long base, unsigned long size, - unsigned int type); - -extern const struct mtrr_ops generic_mtrr_ops; - -extern int positive_have_wrcomb(void); - -/* library functions for processor-specific routines */ -struct set_mtrr_context { - unsigned long flags; - unsigned long cr4val; - u32 deftype_lo; - u32 deftype_hi; - u32 ccr3; -}; - -void set_mtrr_done(struct set_mtrr_context *ctxt); -void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); -void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); - -void fill_mtrr_var_range(unsigned int index, - u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); -void get_mtrr_state(void); - -extern void set_mtrr_ops(const struct mtrr_ops *ops); - -extern u64 size_or_mask, size_and_mask; -extern const struct mtrr_ops *mtrr_if; - -#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) -#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) - -extern unsigned int num_var_ranges; -extern u64 mtrr_tom2; -extern struct mtrr_state_type mtrr_state; - -void mtrr_state_warn(void); -const char *mtrr_attrib_to_str(int x); -void mtrr_wrmsr(unsigned, unsigned, unsigned); - -/* CPU specific mtrr init functions */ -int amd_init_mtrr(void); -int cyrix_init_mtrr(void); -int centaur_init_mtrr(void); - -extern int changed_by_mtrr_cleanup; -extern int mtrr_cleanup(unsigned address_bits); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event.c deleted file mode 100644 index bb8e0340..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event.c +++ /dev/null @@ -1,1886 +0,0 @@ -/* - * Performance events x86 architecture code - * - * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> - * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> - * Copyright (C) 2009 Google, Inc., Stephane Eranian - * - * For licencing details see kernel-base/COPYING - */ - -#include <linux/perf_event.h> -#include <linux/capability.h> -#include <linux/notifier.h> -#include <linux/hardirq.h> -#include <linux/kprobes.h> -#include <linux/module.h> -#include <linux/kdebug.h> -#include <linux/sched.h> -#include <linux/uaccess.h> -#include <linux/slab.h> -#include <linux/cpu.h> -#include <linux/bitops.h> -#include <linux/device.h> - -#include <asm/apic.h> -#include <asm/stacktrace.h> -#include <asm/nmi.h> -#include <asm/smp.h> -#include <asm/alternative.h> -#include <asm/timer.h> - -#include "perf_event.h" - -#if 0 -#undef wrmsrl -#define wrmsrl(msr, val) \ -do { \ - trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ - (unsigned long)(val)); \ - native_write_msr((msr), (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)); \ -} while (0) -#endif - -struct x86_pmu x86_pmu __read_mostly; - -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { - .enabled = 1, -}; - -u64 __read_mostly hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; -u64 __read_mostly hw_cache_extra_regs - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - -/* - * Propagate event elapsed time into the generic event. - * Can only be executed on the CPU where the event is active. - * Returns the delta events processed. - */ -u64 x86_perf_event_update(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int shift = 64 - x86_pmu.cntval_bits; - u64 prev_raw_count, new_raw_count; - int idx = hwc->idx; - s64 delta; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * Careful: an NMI might modify the previous event value. - * - * Our tactic to handle this is to first atomically read and - * exchange a new raw count - then add that new-prev delta - * count to the generic event atomically: - */ -again: - prev_raw_count = local64_read(&hwc->prev_count); - rdmsrl(hwc->event_base, new_raw_count); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - /* - * Now we have the new raw value and have updated the prev - * timestamp already. We can now calculate the elapsed delta - * (event-)time and add that to the generic event. - * - * Careful, not all hw sign-extends above the physical width - * of the count. - */ - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - local64_add(delta, &event->count); - local64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -/* - * Find and validate any extra registers to set up. - */ -static int x86_pmu_extra_regs(u64 config, struct perf_event *event) -{ - struct hw_perf_event_extra *reg; - struct extra_reg *er; - - reg = &event->hw.extra_reg; - - if (!x86_pmu.extra_regs) - return 0; - - for (er = x86_pmu.extra_regs; er->msr; er++) { - if (er->event != (config & er->config_mask)) - continue; - if (event->attr.config1 & ~er->valid_mask) - return -EINVAL; - - reg->idx = er->idx; - reg->config = event->attr.config1; - reg->reg = er->msr; - break; - } - return 0; -} - -static atomic_t active_events; -static DEFINE_MUTEX(pmc_reserve_mutex); - -#ifdef CONFIG_X86_LOCAL_APIC - -static bool reserve_pmc_hardware(void) -{ - int i; - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) - goto perfctr_fail; - } - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) - goto eventsel_fail; - } - - return true; - -eventsel_fail: - for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu_config_addr(i)); - - i = x86_pmu.num_counters; - -perfctr_fail: - for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu_event_addr(i)); - - return false; -} - -static void release_pmc_hardware(void) -{ - int i; - - for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu_event_addr(i)); - release_evntsel_nmi(x86_pmu_config_addr(i)); - } -} - -#else - -static bool reserve_pmc_hardware(void) { return true; } -static void release_pmc_hardware(void) {} - -#endif - -static bool check_hw_exists(void) -{ - u64 val, val_new = 0; - int i, reg, ret = 0; - - /* - * Check to see if the BIOS enabled any of the counters, if so - * complain and bail. - */ - for (i = 0; i < x86_pmu.num_counters; i++) { - reg = x86_pmu_config_addr(i); - ret = rdmsrl_safe(reg, &val); - if (ret) - goto msr_fail; - if (val & ARCH_PERFMON_EVENTSEL_ENABLE) - goto bios_fail; - } - - if (x86_pmu.num_counters_fixed) { - reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - ret = rdmsrl_safe(reg, &val); - if (ret) - goto msr_fail; - for (i = 0; i < x86_pmu.num_counters_fixed; i++) { - if (val & (0x03 << i*4)) - goto bios_fail; - } - } - - /* - * Now write a value and read it back to see if it matches, - * this is needed to detect certain hardware emulators (qemu/kvm) - * that don't trap on the MSR access and always return 0s. - */ - val = 0xabcdUL; - ret = checking_wrmsrl(x86_pmu_event_addr(0), val); - ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); - if (ret || val != val_new) - goto msr_fail; - - return true; - -bios_fail: - /* - * We still allow the PMU driver to operate: - */ - printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); - - return true; - -msr_fail: - printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - - return false; -} - -static void hw_perf_event_destroy(struct perf_event *event) -{ - if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_ds_buffers(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -static inline int x86_pmu_initialized(void) -{ - return x86_pmu.handle_irq != NULL; -} - -static inline int -set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - unsigned int cache_type, cache_op, cache_result; - u64 config, val; - - config = attr->config; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - val = hw_cache_event_ids[cache_type][cache_op][cache_result]; - - if (val == 0) - return -ENOENT; - - if (val == -1) - return -EINVAL; - - hwc->config |= val; - attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; - return x86_pmu_extra_regs(val, event); -} - -int x86_setup_perfctr(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - u64 config; - - if (!is_sampling_event(event)) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * events (user-space has to fall back and - * sample via a hrtimer based software event): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; - } - - if (attr->type == PERF_TYPE_RAW) - return x86_pmu_extra_regs(event->attr.config, event); - - if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, event); - - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - - /* - * The generic map: - */ - config = x86_pmu.event_map(attr->config); - - if (config == 0) - return -ENOENT; - - if (config == -1LL) - return -EINVAL; - - /* - * Branch tracing: - */ - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts_active) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - } - - hwc->config |= config; - - return 0; -} - -/* - * check that branch_sample_type is compatible with - * settings needed for precise_ip > 1 which implies - * using the LBR to capture ALL taken branches at the - * priv levels of the measurement - */ -static inline int precise_br_compat(struct perf_event *event) -{ - u64 m = event->attr.branch_sample_type; - u64 b = 0; - - /* must capture all branches */ - if (!(m & PERF_SAMPLE_BRANCH_ANY)) - return 0; - - m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; - - if (!event->attr.exclude_user) - b |= PERF_SAMPLE_BRANCH_USER; - - if (!event->attr.exclude_kernel) - b |= PERF_SAMPLE_BRANCH_KERNEL; - - /* - * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 - */ - - return m == b; -} - -int x86_pmu_hw_config(struct perf_event *event) -{ - if (event->attr.precise_ip) { - int precise = 0; - - /* Support for constant skid */ - if (x86_pmu.pebs_active) { - precise++; - - /* Support for IP fixup */ - if (x86_pmu.lbr_nr) - precise++; - } - - if (event->attr.precise_ip > precise) - return -EOPNOTSUPP; - /* - * check that PEBS LBR correction does not conflict with - * whatever the user is asking with attr->branch_sample_type - */ - if (event->attr.precise_ip > 1) { - u64 *br_type = &event->attr.branch_sample_type; - - if (has_branch_stack(event)) { - if (!precise_br_compat(event)) - return -EOPNOTSUPP; - - /* branch_sample_type is compatible */ - - } else { - /* - * user did not specify branch_sample_type - * - * For PEBS fixups, we capture all - * the branches at the priv level of the - * event. - */ - *br_type = PERF_SAMPLE_BRANCH_ANY; - - if (!event->attr.exclude_user) - *br_type |= PERF_SAMPLE_BRANCH_USER; - - if (!event->attr.exclude_kernel) - *br_type |= PERF_SAMPLE_BRANCH_KERNEL; - } - } - } - - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - event->hw.config = ARCH_PERFMON_EVENTSEL_INT; - - /* - * Count user and OS events unless requested not to - */ - if (!event->attr.exclude_user) - event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; - if (!event->attr.exclude_kernel) - event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; - - if (event->attr.type == PERF_TYPE_RAW) - event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; - - return x86_setup_perfctr(event); -} - -/* - * Setup the hardware configuration for a given attr_type - */ -static int __x86_pmu_event_init(struct perf_event *event) -{ - int err; - - if (!x86_pmu_initialized()) - return -ENODEV; - - err = 0; - if (!atomic_inc_not_zero(&active_events)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_events) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - reserve_ds_buffers(); - } - if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmc_reserve_mutex); - } - if (err) - return err; - - event->destroy = hw_perf_event_destroy; - - event->hw.idx = -1; - event->hw.last_cpu = -1; - event->hw.last_tag = ~0ULL; - - /* mark unused */ - event->hw.extra_reg.idx = EXTRA_REG_NONE; - - /* mark not used */ - event->hw.extra_reg.idx = EXTRA_REG_NONE; - event->hw.branch_reg.idx = EXTRA_REG_NONE; - - return x86_pmu.hw_config(event); -} - -void x86_pmu_disable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - rdmsrl(x86_pmu_config_addr(idx), val); - if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) - continue; - val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu_config_addr(idx), val); - } -} - -static void x86_pmu_disable(struct pmu *pmu) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (!x86_pmu_initialized()) - return; - - if (!cpuc->enabled) - return; - - cpuc->n_added = 0; - cpuc->enabled = 0; - barrier(); - - x86_pmu.disable_all(); -} - -void x86_pmu_enable_all(int added) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct hw_perf_event *hwc = &cpuc->events[idx]->hw; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); - } -} - -static struct pmu pmu; - -static inline int is_x86_event(struct perf_event *event) -{ - return event->pmu == &pmu; -} - -/* - * Event scheduler state: - * - * Assign events iterating over all events and counters, beginning - * with events with least weights first. Keep the current iterator - * state in struct sched_state. - */ -struct sched_state { - int weight; - int event; /* event index */ - int counter; /* counter index */ - int unassigned; /* number of events to be assigned left */ - unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; -}; - -/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ -#define SCHED_STATES_MAX 2 - -struct perf_sched { - int max_weight; - int max_events; - struct event_constraint **constraints; - struct sched_state state; - int saved_states; - struct sched_state saved[SCHED_STATES_MAX]; -}; - -/* - * Initialize interator that runs through all events and counters. - */ -static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, - int num, int wmin, int wmax) -{ - int idx; - - memset(sched, 0, sizeof(*sched)); - sched->max_events = num; - sched->max_weight = wmax; - sched->constraints = c; - - for (idx = 0; idx < num; idx++) { - if (c[idx]->weight == wmin) - break; - } - - sched->state.event = idx; /* start with min weight */ - sched->state.weight = wmin; - sched->state.unassigned = num; -} - -static void perf_sched_save_state(struct perf_sched *sched) -{ - if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) - return; - - sched->saved[sched->saved_states] = sched->state; - sched->saved_states++; -} - -static bool perf_sched_restore_state(struct perf_sched *sched) -{ - if (!sched->saved_states) - return false; - - sched->saved_states--; - sched->state = sched->saved[sched->saved_states]; - - /* continue with next counter: */ - clear_bit(sched->state.counter++, sched->state.used); - - return true; -} - -/* - * Select a counter for the current event to schedule. Return true on - * success. - */ -static bool __perf_sched_find_counter(struct perf_sched *sched) -{ - struct event_constraint *c; - int idx; - - if (!sched->state.unassigned) - return false; - - if (sched->state.event >= sched->max_events) - return false; - - c = sched->constraints[sched->state.event]; - - /* Prefer fixed purpose counters */ - if (x86_pmu.num_counters_fixed) { - idx = X86_PMC_IDX_FIXED; - for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { - if (!__test_and_set_bit(idx, sched->state.used)) - goto done; - } - } - /* Grab the first unused counter starting with idx */ - idx = sched->state.counter; - for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { - if (!__test_and_set_bit(idx, sched->state.used)) - goto done; - } - - return false; - -done: - sched->state.counter = idx; - - if (c->overlap) - perf_sched_save_state(sched); - - return true; -} - -static bool perf_sched_find_counter(struct perf_sched *sched) -{ - while (!__perf_sched_find_counter(sched)) { - if (!perf_sched_restore_state(sched)) - return false; - } - - return true; -} - -/* - * Go through all unassigned events and find the next one to schedule. - * Take events with the least weight first. Return true on success. - */ -static bool perf_sched_next_event(struct perf_sched *sched) -{ - struct event_constraint *c; - - if (!sched->state.unassigned || !--sched->state.unassigned) - return false; - - do { - /* next event */ - sched->state.event++; - if (sched->state.event >= sched->max_events) { - /* next weight */ - sched->state.event = 0; - sched->state.weight++; - if (sched->state.weight > sched->max_weight) - return false; - } - c = sched->constraints[sched->state.event]; - } while (c->weight != sched->state.weight); - - sched->state.counter = 0; /* start with first counter */ - - return true; -} - -/* - * Assign a counter for each event. - */ -static int perf_assign_events(struct event_constraint **constraints, int n, - int wmin, int wmax, int *assign) -{ - struct perf_sched sched; - - perf_sched_init(&sched, constraints, n, wmin, wmax); - - do { - if (!perf_sched_find_counter(&sched)) - break; /* failed */ - if (assign) - assign[sched.state.event] = sched.state.counter; - } while (perf_sched_next_event(&sched)); - - return sched.state.unassigned; -} - -int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) -{ - struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int i, wmin, wmax, num = 0; - struct hw_perf_event *hwc; - - bitmap_zero(used_mask, X86_PMC_IDX_MAX); - - for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { - c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); - constraints[i] = c; - wmin = min(wmin, c->weight); - wmax = max(wmax, c->weight); - } - - /* - * fastpath, try to reuse previous register - */ - for (i = 0; i < n; i++) { - hwc = &cpuc->event_list[i]->hw; - c = constraints[i]; - - /* never assigned */ - if (hwc->idx == -1) - break; - - /* constraint still honored */ - if (!test_bit(hwc->idx, c->idxmsk)) - break; - - /* not already used */ - if (test_bit(hwc->idx, used_mask)) - break; - - __set_bit(hwc->idx, used_mask); - if (assign) - assign[i] = hwc->idx; - } - - /* slow path */ - if (i != n) - num = perf_assign_events(constraints, n, wmin, wmax, assign); - - /* - * scheduling failed or is just a simulation, - * free resources if necessary - */ - if (!assign || num) { - for (i = 0; i < n; i++) { - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); - } - } - return num ? -EINVAL : 0; -} - -/* - * dogrp: true if must collect siblings events (group) - * returns total number of events and error code - */ -static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) -{ - struct perf_event *event; - int n, max_count; - - max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; - - /* current number of events already accepted */ - n = cpuc->n_events; - - if (is_x86_event(leader)) { - if (n >= max_count) - return -EINVAL; - cpuc->event_list[n] = leader; - n++; - } - if (!dogrp) - return n; - - list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (!is_x86_event(event) || - event->state <= PERF_EVENT_STATE_OFF) - continue; - - if (n >= max_count) - return -EINVAL; - - cpuc->event_list[n] = event; - n++; - } - return n; -} - -static inline void x86_assign_hw_event(struct perf_event *event, - struct cpu_hw_events *cpuc, int i) -{ - struct hw_perf_event *hwc = &event->hw; - - hwc->idx = cpuc->assign[i]; - hwc->last_cpu = smp_processor_id(); - hwc->last_tag = ++cpuc->tags[i]; - - if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { - hwc->config_base = 0; - hwc->event_base = 0; - } else if (hwc->idx >= X86_PMC_IDX_FIXED) { - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); - } else { - hwc->config_base = x86_pmu_config_addr(hwc->idx); - hwc->event_base = x86_pmu_event_addr(hwc->idx); - } -} - -static inline int match_prev_assignment(struct hw_perf_event *hwc, - struct cpu_hw_events *cpuc, - int i) -{ - return hwc->idx == cpuc->assign[i] && - hwc->last_cpu == smp_processor_id() && - hwc->last_tag == cpuc->tags[i]; -} - -static void x86_pmu_start(struct perf_event *event, int flags); - -static void x86_pmu_enable(struct pmu *pmu) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct perf_event *event; - struct hw_perf_event *hwc; - int i, added = cpuc->n_added; - - if (!x86_pmu_initialized()) - return; - - if (cpuc->enabled) - return; - - if (cpuc->n_added) { - int n_running = cpuc->n_events - cpuc->n_added; - /* - * apply assignment obtained either from - * hw_perf_group_sched_in() or x86_pmu_enable() - * - * step1: save events moving to new counters - * step2: reprogram moved events into new counters - */ - for (i = 0; i < n_running; i++) { - event = cpuc->event_list[i]; - hwc = &event->hw; - - /* - * we can avoid reprogramming counter if: - * - assigned same counter as last time - * - running on same CPU as last time - * - no other event has used the counter since - */ - if (hwc->idx == -1 || - match_prev_assignment(hwc, cpuc, i)) - continue; - - /* - * Ensure we don't accidentally enable a stopped - * counter simply because we rescheduled. - */ - if (hwc->state & PERF_HES_STOPPED) - hwc->state |= PERF_HES_ARCH; - - x86_pmu_stop(event, PERF_EF_UPDATE); - } - - for (i = 0; i < cpuc->n_events; i++) { - event = cpuc->event_list[i]; - hwc = &event->hw; - - if (!match_prev_assignment(hwc, cpuc, i)) - x86_assign_hw_event(event, cpuc, i); - else if (i < n_running) - continue; - - if (hwc->state & PERF_HES_ARCH) - continue; - - x86_pmu_start(event, PERF_EF_RELOAD); - } - cpuc->n_added = 0; - perf_events_lapic_init(); - } - - cpuc->enabled = 1; - barrier(); - - x86_pmu.enable_all(added); -} - -static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); - -/* - * Set the next IRQ period, based on the hwc->period_left value. - * To be called with the event disabled in hw: - */ -int x86_perf_event_set_period(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - s64 left = local64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0, idx = hwc->idx; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * If we are way outside a reasonable range then just skip forward: - */ - if (unlikely(left <= -period)) { - left = period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - /* - * Quirk: certain CPUs dont like it if just 1 hw_event is left: - */ - if (unlikely(left < 2)) - left = 2; - - if (left > x86_pmu.max_period) - left = x86_pmu.max_period; - - per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; - - /* - * The hw event starts counting from this event offset, - * mark it to be able to extra future deltas: - */ - local64_set(&hwc->prev_count, (u64)-left); - - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); - - /* - * Due to erratum on certan cpu we need - * a second write to be sure the register - * is updated properly - */ - if (x86_pmu.perfctr_second_write) { - wrmsrl(hwc->event_base, - (u64)(-left) & x86_pmu.cntval_mask); - } - - perf_event_update_userpage(event); - - return ret; -} - -void x86_pmu_enable_event(struct perf_event *event) -{ - if (__this_cpu_read(cpu_hw_events.enabled)) - __x86_pmu_enable_event(&event->hw, - ARCH_PERFMON_EVENTSEL_ENABLE); -} - -/* - * Add a single event to the PMU. - * - * The event is added to the group of enabled events - * but only if it can be scehduled with existing events. - */ -static int x86_pmu_add(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc; - int assign[X86_PMC_IDX_MAX]; - int n, n0, ret; - - hwc = &event->hw; - - perf_pmu_disable(event->pmu); - n0 = cpuc->n_events; - ret = n = collect_events(cpuc, event, false); - if (ret < 0) - goto out; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - if (!(flags & PERF_EF_START)) - hwc->state |= PERF_HES_ARCH; - - /* - * If group events scheduling transaction was started, - * skip the schedulability test here, it will be performed - * at commit time (->commit_txn) as a whole - */ - if (cpuc->group_flag & PERF_EVENT_TXN) - goto done_collect; - - ret = x86_pmu.schedule_events(cpuc, n, assign); - if (ret) - goto out; - /* - * copy new assignment, now we know it is possible - * will be used by hw_perf_enable() - */ - memcpy(cpuc->assign, assign, n*sizeof(int)); - -done_collect: - cpuc->n_events = n; - cpuc->n_added += n - n0; - cpuc->n_txn += n - n0; - - ret = 0; -out: - perf_pmu_enable(event->pmu); - return ret; -} - -static void x86_pmu_start(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx = event->hw.idx; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - if (WARN_ON_ONCE(idx == -1)) - return; - - if (flags & PERF_EF_RELOAD) { - WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - x86_perf_event_set_period(event); - } - - event->hw.state = 0; - - cpuc->events[idx] = event; - __set_bit(idx, cpuc->active_mask); - __set_bit(idx, cpuc->running); - x86_pmu.enable(event); - perf_event_update_userpage(event); -} - -void perf_event_print_debug(void) -{ - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - u64 pebs; - struct cpu_hw_events *cpuc; - unsigned long flags; - int cpu, idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_events, cpu); - - if (x86_pmu.version >= 2) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); - - pr_info("\n"); - pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); - pr_info("CPU#%d: status: %016llx\n", cpu, status); - pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); - pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); - } - pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); - rdmsrl(x86_pmu_event_addr(idx), pmc_count); - - prev_left = per_cpu(pmc_prev_left[idx], cpu); - - pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", - cpu, idx, pmc_ctrl); - pr_info("CPU#%d: gen-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - pr_info("CPU#%d: gen-PMC%d left: %016llx\n", - cpu, idx, prev_left); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); - - pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - } - local_irq_restore(flags); -} - -void x86_pmu_stop(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { - x86_pmu.disable(event); - cpuc->events[hwc->idx] = NULL; - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - } - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - x86_perf_event_update(event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -static void x86_pmu_del(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int i; - - /* - * If we're called during a txn, we don't need to do anything. - * The events never got scheduled and ->cancel_txn will truncate - * the event_list. - */ - if (cpuc->group_flag & PERF_EVENT_TXN) - return; - - x86_pmu_stop(event, PERF_EF_UPDATE); - - for (i = 0; i < cpuc->n_events; i++) { - if (event == cpuc->event_list[i]) { - - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, event); - - while (++i < cpuc->n_events) - cpuc->event_list[i-1] = cpuc->event_list[i]; - - --cpuc->n_events; - break; - } - } - perf_event_update_userpage(event); -} - -int x86_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct perf_event *event; - int idx, handled = 0; - u64 val; - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - - /* - * Some chipsets need to unmask the LVTPC in a particular spot - * inside the nmi handler. As a result, the unmasking was pushed - * into all the nmi handlers. - * - * This generic handler doesn't seem to have any issues where the - * unmasking occurs so it was left at the top. - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) { - /* - * Though we deactivated the counter some cpus - * might still deliver spurious interrupts still - * in flight. Catch them: - */ - if (__test_and_clear_bit(idx, cpuc->running)) - handled++; - continue; - } - - event = cpuc->events[idx]; - - val = x86_perf_event_update(event); - if (val & (1ULL << (x86_pmu.cntval_bits - 1))) - continue; - - /* - * event overflow - */ - handled++; - data.period = event->hw.last_period; - - if (!x86_perf_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -void perf_events_lapic_init(void) -{ - if (!x86_pmu.apic || !x86_pmu_initialized()) - return; - - /* - * Always use NMI for PMU - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); -} - -static int __kprobes -perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) -{ - if (!atomic_read(&active_events)) - return NMI_DONE; - - return x86_pmu.handle_irq(regs); -} - -struct event_constraint emptyconstraint; -struct event_constraint unconstrained; - -static int __cpuinit -x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - int ret = NOTIFY_OK; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - cpuc->kfree_on_online = NULL; - if (x86_pmu.cpu_prepare) - ret = x86_pmu.cpu_prepare(cpu); - break; - - case CPU_STARTING: - if (x86_pmu.attr_rdpmc) - set_in_cr4(X86_CR4_PCE); - if (x86_pmu.cpu_starting) - x86_pmu.cpu_starting(cpu); - break; - - case CPU_ONLINE: - kfree(cpuc->kfree_on_online); - break; - - case CPU_DYING: - if (x86_pmu.cpu_dying) - x86_pmu.cpu_dying(cpu); - break; - - case CPU_UP_CANCELED: - case CPU_DEAD: - if (x86_pmu.cpu_dead) - x86_pmu.cpu_dead(cpu); - break; - - default: - break; - } - - return ret; -} - -static void __init pmu_check_apic(void) -{ - if (cpu_has_apic) - return; - - x86_pmu.apic = 0; - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); -} - -static struct attribute_group x86_pmu_format_group = { - .name = "format", - .attrs = NULL, -}; - -static int __init init_hw_perf_events(void) -{ - struct x86_pmu_quirk *quirk; - struct event_constraint *c; - int err; - - pr_info("Performance Events: "); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - err = intel_pmu_init(); - break; - case X86_VENDOR_AMD: - err = amd_pmu_init(); - break; - default: - return 0; - } - if (err != 0) { - pr_cont("no PMU driver, software events only.\n"); - return 0; - } - - pmu_check_apic(); - - /* sanity check that the hardware exists or is emulated */ - if (!check_hw_exists()) - return 0; - - pr_cont("%s PMU driver.\n", x86_pmu.name); - - for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) - quirk->func(); - - if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", - x86_pmu.num_counters, X86_PMC_MAX_GENERIC); - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; - } - x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - - if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; - } - - x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; - - perf_events_lapic_init(); - register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); - - unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters, 0); - - if (x86_pmu.event_constraints) { - /* - * event on fixed counter2 (REF_CYCLES) only works on this - * counter, so do not extend mask to generic counters - */ - for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != X86_RAW_EVENT_MASK - || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { - continue; - } - - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; - } - } - - x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ - x86_pmu_format_group.attrs = x86_pmu.format_attrs; - - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.cntval_bits); - pr_info("... generic registers: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); - pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); - - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); - perf_cpu_notifier(x86_pmu_notifier); - - return 0; -} -early_initcall(init_hw_perf_events); - -static inline void x86_pmu_read(struct perf_event *event) -{ - x86_perf_event_update(event); -} - -/* - * Start group events scheduling transaction - * Set the flag to make pmu::enable() not perform the - * schedulability test, it will be performed at commit time - */ -static void x86_pmu_start_txn(struct pmu *pmu) -{ - perf_pmu_disable(pmu); - __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); - __this_cpu_write(cpu_hw_events.n_txn, 0); -} - -/* - * Stop group events scheduling transaction - * Clear the flag and pmu::enable() will perform the - * schedulability test. - */ -static void x86_pmu_cancel_txn(struct pmu *pmu) -{ - __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); - /* - * Truncate the collected events. - */ - __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); - __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); - perf_pmu_enable(pmu); -} - -/* - * Commit group events scheduling transaction - * Perform the group schedulability test as a whole - * Return 0 if success - */ -static int x86_pmu_commit_txn(struct pmu *pmu) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int assign[X86_PMC_IDX_MAX]; - int n, ret; - - n = cpuc->n_events; - - if (!x86_pmu_initialized()) - return -EAGAIN; - - ret = x86_pmu.schedule_events(cpuc, n, assign); - if (ret) - return ret; - - /* - * copy new assignment, now we know it is possible - * will be used by hw_perf_enable() - */ - memcpy(cpuc->assign, assign, n*sizeof(int)); - - cpuc->group_flag &= ~PERF_EVENT_TXN; - perf_pmu_enable(pmu); - return 0; -} -/* - * a fake_cpuc is used to validate event groups. Due to - * the extra reg logic, we need to also allocate a fake - * per_core and per_cpu structure. Otherwise, group events - * using extra reg may conflict without the kernel being - * able to catch this when the last event gets added to - * the group. - */ -static void free_fake_cpuc(struct cpu_hw_events *cpuc) -{ - kfree(cpuc->shared_regs); - kfree(cpuc); -} - -static struct cpu_hw_events *allocate_fake_cpuc(void) -{ - struct cpu_hw_events *cpuc; - int cpu = raw_smp_processor_id(); - - cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); - if (!cpuc) - return ERR_PTR(-ENOMEM); - - /* only needed, if we have extra_regs */ - if (x86_pmu.extra_regs) { - cpuc->shared_regs = allocate_shared_regs(cpu); - if (!cpuc->shared_regs) - goto error; - } - return cpuc; -error: - free_fake_cpuc(cpuc); - return ERR_PTR(-ENOMEM); -} - -/* - * validate that we can schedule this event - */ -static int validate_event(struct perf_event *event) -{ - struct cpu_hw_events *fake_cpuc; - struct event_constraint *c; - int ret = 0; - - fake_cpuc = allocate_fake_cpuc(); - if (IS_ERR(fake_cpuc)) - return PTR_ERR(fake_cpuc); - - c = x86_pmu.get_event_constraints(fake_cpuc, event); - - if (!c || !c->weight) - ret = -EINVAL; - - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(fake_cpuc, event); - - free_fake_cpuc(fake_cpuc); - - return ret; -} - -/* - * validate a single event group - * - * validation include: - * - check events are compatible which each other - * - events do not compete for the same counter - * - number of events <= number of counters - * - * validation ensures the group can be loaded onto the - * PMU if it was the only group available. - */ -static int validate_group(struct perf_event *event) -{ - struct perf_event *leader = event->group_leader; - struct cpu_hw_events *fake_cpuc; - int ret = -EINVAL, n; - - fake_cpuc = allocate_fake_cpuc(); - if (IS_ERR(fake_cpuc)) - return PTR_ERR(fake_cpuc); - /* - * the event is not yet connected with its - * siblings therefore we must first collect - * existing siblings, then add the new event - * before we can simulate the scheduling - */ - n = collect_events(fake_cpuc, leader, true); - if (n < 0) - goto out; - - fake_cpuc->n_events = n; - n = collect_events(fake_cpuc, event, false); - if (n < 0) - goto out; - - fake_cpuc->n_events = n; - - ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); - -out: - free_fake_cpuc(fake_cpuc); - return ret; -} - -static int x86_pmu_event_init(struct perf_event *event) -{ - struct pmu *tmp; - int err; - - switch (event->attr.type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - break; - - default: - return -ENOENT; - } - - err = __x86_pmu_event_init(event); - if (!err) { - /* - * we temporarily connect event to its pmu - * such that validate_group() can classify - * it as an x86 event using is_x86_event() - */ - tmp = event->pmu; - event->pmu = &pmu; - - if (event->group_leader != event) - err = validate_group(event); - else - err = validate_event(event); - - event->pmu = tmp; - } - if (err) { - if (event->destroy) - event->destroy(event); - } - - return err; -} - -static int x86_pmu_event_idx(struct perf_event *event) -{ - int idx = event->hw.idx; - - if (!x86_pmu.attr_rdpmc) - return 0; - - if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { - idx -= X86_PMC_IDX_FIXED; - idx |= 1 << 30; - } - - return idx + 1; -} - -static ssize_t get_attr_rdpmc(struct device *cdev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); -} - -static void change_rdpmc(void *info) -{ - bool enable = !!(unsigned long)info; - - if (enable) - set_in_cr4(X86_CR4_PCE); - else - clear_in_cr4(X86_CR4_PCE); -} - -static ssize_t set_attr_rdpmc(struct device *cdev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - unsigned long val = simple_strtoul(buf, NULL, 0); - - if (!!val != !!x86_pmu.attr_rdpmc) { - x86_pmu.attr_rdpmc = !!val; - smp_call_function(change_rdpmc, (void *)val, 1); - } - - return count; -} - -static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); - -static struct attribute *x86_pmu_attrs[] = { - &dev_attr_rdpmc.attr, - NULL, -}; - -static struct attribute_group x86_pmu_attr_group = { - .attrs = x86_pmu_attrs, -}; - -static const struct attribute_group *x86_pmu_attr_groups[] = { - &x86_pmu_attr_group, - &x86_pmu_format_group, - NULL, -}; - -static void x86_pmu_flush_branch_stack(void) -{ - if (x86_pmu.flush_branch_stack) - x86_pmu.flush_branch_stack(); -} - -static struct pmu pmu = { - .pmu_enable = x86_pmu_enable, - .pmu_disable = x86_pmu_disable, - - .attr_groups = x86_pmu_attr_groups, - - .event_init = x86_pmu_event_init, - - .add = x86_pmu_add, - .del = x86_pmu_del, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, - - .event_idx = x86_pmu_event_idx, - .flush_branch_stack = x86_pmu_flush_branch_stack, -}; - -void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) -{ - userpg->cap_usr_time = 0; - userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; - userpg->pmc_width = x86_pmu.cntval_bits; - - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return; - - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - return; - - userpg->cap_usr_time = 1; - userpg->time_mult = this_cpu_read(cyc2ns); - userpg->time_shift = CYC2NS_SCALE_FACTOR; - userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; -} - -/* - * callchain support - */ - -static int backtrace_stack(void *data, char *name) -{ - return 0; -} - -static void backtrace_address(void *data, unsigned long addr, int reliable) -{ - struct perf_callchain_entry *entry = data; - - perf_callchain_store(entry, addr); -} - -static const struct stacktrace_ops backtrace_ops = { - .stack = backtrace_stack, - .address = backtrace_address, - .walk_stack = print_context_stack_bp, -}; - -void -perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) -{ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return; - } - - perf_callchain_store(entry, regs->ip); - - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); -} - -#ifdef CONFIG_COMPAT - -#include <asm/compat.h> - -static inline int -perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - /* 32-bit process in 64-bit kernel. */ - struct stack_frame_ia32 frame; - const void __user *fp; - - if (!test_thread_flag(TIF_IA32)) - return 0; - - fp = compat_ptr(regs->bp); - while (entry->nr < PERF_MAX_STACK_DEPTH) { - unsigned long bytes; - frame.next_frame = 0; - frame.return_address = 0; - - bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) - break; - - if (fp < compat_ptr(regs->sp)) - break; - - perf_callchain_store(entry, frame.return_address); - fp = compat_ptr(frame.next_frame); - } - return 1; -} -#else -static inline int -perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - return 0; -} -#endif - -void -perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) -{ - struct stack_frame frame; - const void __user *fp; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return; - } - - fp = (void __user *)regs->bp; - - perf_callchain_store(entry, regs->ip); - - if (!current->mm) - return; - - if (perf_callchain_user32(regs, entry)) - return; - - while (entry->nr < PERF_MAX_STACK_DEPTH) { - unsigned long bytes; - frame.next_frame = NULL; - frame.return_address = 0; - - bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) - break; - - if ((unsigned long)fp < regs->sp) - break; - - perf_callchain_store(entry, frame.return_address); - fp = frame.next_frame; - } -} - -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - unsigned long ip; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - ip = perf_guest_cbs->get_guest_ip(); - else - ip = instruction_pointer(regs); - - return ip; -} - -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - int misc = 0; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } - - if (regs->flags & PERF_EFLAGS_EXACT) - misc |= PERF_RECORD_MISC_EXACT_IP; - - return misc; -} - -void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) -{ - cap->version = x86_pmu.version; - cap->num_counters_gp = x86_pmu.num_counters; - cap->num_counters_fixed = x86_pmu.num_counters_fixed; - cap->bit_width_gp = x86_pmu.cntval_bits; - cap->bit_width_fixed = x86_pmu.cntval_bits; - cap->events_mask = (unsigned int)x86_pmu.events_maskl; - cap->events_mask_len = x86_pmu.events_mask_len; -} -EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event.h b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event.h deleted file mode 100644 index 6638aaf5..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event.h +++ /dev/null @@ -1,605 +0,0 @@ -/* - * Performance events x86 architecture header - * - * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> - * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> - * Copyright (C) 2009 Google, Inc., Stephane Eranian - * - * For licencing details see kernel-base/COPYING - */ - -#include <linux/perf_event.h> - -/* - * | NHM/WSM | SNB | - * register ------------------------------- - * | HT | no HT | HT | no HT | - *----------------------------------------- - * offcore | core | core | cpu | core | - * lbr_sel | core | core | cpu | core | - * ld_lat | cpu | core | cpu | core | - *----------------------------------------- - * - * Given that there is a small number of shared regs, - * we can pre-allocate their slot in the per-cpu - * per-core reg tables. - */ -enum extra_reg_type { - EXTRA_REG_NONE = -1, /* not used */ - - EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ - EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ - EXTRA_REG_LBR = 2, /* lbr_select */ - - EXTRA_REG_MAX /* number of entries needed */ -}; - -struct event_constraint { - union { - unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - u64 idxmsk64; - }; - u64 code; - u64 cmask; - int weight; - int overlap; -}; - -struct amd_nb { - int nb_id; /* NorthBridge id */ - int refcnt; /* reference count */ - struct perf_event *owners[X86_PMC_IDX_MAX]; - struct event_constraint event_constraints[X86_PMC_IDX_MAX]; -}; - -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 4 - -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - -/* - * Per register state. - */ -struct er_account { - raw_spinlock_t lock; /* per-core: protect structure */ - u64 config; /* extra MSR config */ - u64 reg; /* extra MSR number */ - atomic_t ref; /* reference count */ -}; - -/* - * Per core/cpu state - * - * Used to coordinate shared registers between HT threads or - * among events on a single PMU. - */ -struct intel_shared_regs { - struct er_account regs[EXTRA_REG_MAX]; - int refcnt; /* per-core: #HT threads */ - unsigned core_id; /* per-core: core id */ -}; - -#define MAX_LBR_ENTRIES 16 - -struct cpu_hw_events { - /* - * Generic x86 PMC bits - */ - struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int enabled; - - int n_events; - int n_added; - int n_txn; - int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ - u64 tags[X86_PMC_IDX_MAX]; - struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ - - unsigned int group_flag; - - /* - * Intel DebugStore bits - */ - struct debug_store *ds; - u64 pebs_enabled; - - /* - * Intel LBR bits - */ - int lbr_users; - void *lbr_context; - struct perf_branch_stack lbr_stack; - struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; - struct er_account *lbr_sel; - u64 br_sel; - - /* - * Intel host/guest exclude bits - */ - u64 intel_ctrl_guest_mask; - u64 intel_ctrl_host_mask; - struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; - - /* - * manage shared (per-core, per-cpu) registers - * used on Intel NHM/WSM/SNB - */ - struct intel_shared_regs *shared_regs; - - /* - * AMD specific bits - */ - struct amd_nb *amd_nb; - /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ - u64 perf_ctr_virt_mask; - - void *kfree_on_online; -}; - -#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ - { .idxmsk64 = (n) }, \ - .code = (c), \ - .cmask = (m), \ - .weight = (w), \ - .overlap = (o), \ -} - -#define EVENT_CONSTRAINT(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) - -/* - * The overlap flag marks event constraints with overlapping counter - * masks. This is the case if the counter mask of such an event is not - * a subset of any other counter mask of a constraint with an equal or - * higher weight, e.g.: - * - * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); - * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); - * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); - * - * The event scheduler may not select the correct counter in the first - * cycle because it needs to know which subsequent events will be - * scheduled. It may fail to schedule the events then. So we set the - * overlap flag for such constraints to give the scheduler a hint which - * events to select for counter rescheduling. - * - * Care must be taken as the rescheduling algorithm is O(n!) which - * will increase scheduling cycles for an over-commited system - * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros - * and its counter masks must be kept at a minimum. - */ -#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) - -/* - * Constraint on the Event code. - */ -#define INTEL_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) - -/* - * Constraint on the Event code + UMask + fixed-mask - * - * filter mask to validate fixed counter events. - * the following filters disqualify for fixed counters: - * - inv - * - edge - * - cnt-mask - * The other filters are supported by fixed counters. - * The any-thread option is supported starting with v3. - */ -#define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) - -/* - * Constraint on the Event code + UMask - */ -#define INTEL_UEVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) - -#define EVENT_CONSTRAINT_END \ - EVENT_CONSTRAINT(0, 0, 0) - -#define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->weight; (e)++) - -/* - * Extra registers for specific events. - * - * Some events need large masks and require external MSRs. - * Those extra MSRs end up being shared for all events on - * a PMU and sometimes between PMU of sibling HT threads. - * In either case, the kernel needs to handle conflicting - * accesses to those extra, shared, regs. The data structure - * to manage those registers is stored in cpu_hw_event. - */ -struct extra_reg { - unsigned int event; - unsigned int msr; - u64 config_mask; - u64 valid_mask; - int idx; /* per_xxx->regs[] reg index */ -}; - -#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i \ - } - -#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ - EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) - -#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) - -union perf_capabilities { - struct { - u64 lbr_format:6; - u64 pebs_trap:1; - u64 pebs_arch_reg:1; - u64 pebs_format:4; - u64 smm_freeze:1; - }; - u64 capabilities; -}; - -struct x86_pmu_quirk { - struct x86_pmu_quirk *next; - void (*func)(void); -}; - -union x86_pmu_config { - struct { - u64 event:8, - umask:8, - usr:1, - os:1, - edge:1, - pc:1, - interrupt:1, - __reserved1:1, - en:1, - inv:1, - cmask:8, - event2:4, - __reserved2:4, - go:1, - ho:1; - } bits; - u64 value; -}; - -#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value - -/* - * struct x86_pmu - generic x86 pmu - */ -struct x86_pmu { - /* - * Generic x86 PMC bits - */ - const char *name; - int version; - int (*handle_irq)(struct pt_regs *); - void (*disable_all)(void); - void (*enable_all)(int added); - void (*enable)(struct perf_event *); - void (*disable)(struct perf_event *); - int (*hw_config)(struct perf_event *event); - int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); - unsigned eventsel; - unsigned perfctr; - u64 (*event_map)(int); - int max_events; - int num_counters; - int num_counters_fixed; - int cntval_bits; - u64 cntval_mask; - union { - unsigned long events_maskl; - unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; - }; - int events_mask_len; - int apic; - u64 max_period; - struct event_constraint * - (*get_event_constraints)(struct cpu_hw_events *cpuc, - struct perf_event *event); - - void (*put_event_constraints)(struct cpu_hw_events *cpuc, - struct perf_event *event); - struct event_constraint *event_constraints; - struct x86_pmu_quirk *quirks; - int perfctr_second_write; - - /* - * sysfs attrs - */ - int attr_rdpmc; - struct attribute **format_attrs; - - /* - * CPU Hotplug hooks - */ - int (*cpu_prepare)(int cpu); - void (*cpu_starting)(int cpu); - void (*cpu_dying)(int cpu); - void (*cpu_dead)(int cpu); - void (*flush_branch_stack)(void); - - /* - * Intel Arch Perfmon v2+ - */ - u64 intel_ctrl; - union perf_capabilities intel_cap; - - /* - * Intel DebugStore bits - */ - int bts, pebs; - int bts_active, pebs_active; - int pebs_record_size; - void (*drain_pebs)(struct pt_regs *regs); - struct event_constraint *pebs_constraints; - - /* - * Intel LBR - */ - unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ - int lbr_nr; /* hardware stack size */ - u64 lbr_sel_mask; /* LBR_SELECT valid bits */ - const int *lbr_sel_map; /* lbr_select mappings */ - - /* - * Extra registers for events - */ - struct extra_reg *extra_regs; - unsigned int er_flags; - - /* - * Intel host/guest support (KVM) - */ - struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); -}; - -#define x86_add_quirk(func_) \ -do { \ - static struct x86_pmu_quirk __quirk __initdata = { \ - .func = func_, \ - }; \ - __quirk.next = x86_pmu.quirks; \ - x86_pmu.quirks = &__quirk; \ -} while (0) - -#define ERF_NO_HT_SHARING 1 -#define ERF_HAS_RSP_1 2 - -extern struct x86_pmu x86_pmu __read_mostly; - -DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); - -int x86_perf_event_set_period(struct perf_event *event); - -/* - * Generalized hw caching related hw_event table, filled - * in on a per model basis. A value of 0 means - * 'not supported', -1 means 'hw_event makes no sense on - * this CPU', any other value means the raw hw_event - * ID. - */ - -#define C(x) PERF_COUNT_HW_CACHE_##x - -extern u64 __read_mostly hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; -extern u64 __read_mostly hw_cache_extra_regs - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - -u64 x86_perf_event_update(struct perf_event *event); - -static inline int x86_pmu_addr_offset(int index) -{ - int offset; - - /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ - alternative_io(ASM_NOP2, - "shll $1, %%eax", - X86_FEATURE_PERFCTR_CORE, - "=a" (offset), - "a" (index)); - - return offset; -} - -static inline unsigned int x86_pmu_config_addr(int index) -{ - return x86_pmu.eventsel + x86_pmu_addr_offset(index); -} - -static inline unsigned int x86_pmu_event_addr(int index) -{ - return x86_pmu.perfctr + x86_pmu_addr_offset(index); -} - -int x86_setup_perfctr(struct perf_event *event); - -int x86_pmu_hw_config(struct perf_event *event); - -void x86_pmu_disable_all(void); - -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, - u64 enable_mask) -{ - u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); - - if (hwc->extra_reg.reg) - wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); - wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); -} - -void x86_pmu_enable_all(int added); - -int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); - -void x86_pmu_stop(struct perf_event *event, int flags); - -static inline void x86_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - wrmsrl(hwc->config_base, hwc->config); -} - -void x86_pmu_enable_event(struct perf_event *event); - -int x86_pmu_handle_irq(struct pt_regs *regs); - -extern struct event_constraint emptyconstraint; - -extern struct event_constraint unconstrained; - -static inline bool kernel_ip(unsigned long ip) -{ -#ifdef CONFIG_X86_32 - return ip > PAGE_OFFSET; -#else - return (long)ip < 0; -#endif -} - -#ifdef CONFIG_CPU_SUP_AMD - -int amd_pmu_init(void); - -#else /* CONFIG_CPU_SUP_AMD */ - -static inline int amd_pmu_init(void) -{ - return 0; -} - -#endif /* CONFIG_CPU_SUP_AMD */ - -#ifdef CONFIG_CPU_SUP_INTEL - -int intel_pmu_save_and_restart(struct perf_event *event); - -struct event_constraint * -x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event); - -struct intel_shared_regs *allocate_shared_regs(int cpu); - -int intel_pmu_init(void); - -void init_debug_store_on_cpu(int cpu); - -void fini_debug_store_on_cpu(int cpu); - -void release_ds_buffers(void); - -void reserve_ds_buffers(void); - -extern struct event_constraint bts_constraint; - -void intel_pmu_enable_bts(u64 config); - -void intel_pmu_disable_bts(void); - -int intel_pmu_drain_bts_buffer(void); - -extern struct event_constraint intel_core2_pebs_event_constraints[]; - -extern struct event_constraint intel_atom_pebs_event_constraints[]; - -extern struct event_constraint intel_nehalem_pebs_event_constraints[]; - -extern struct event_constraint intel_westmere_pebs_event_constraints[]; - -extern struct event_constraint intel_snb_pebs_event_constraints[]; - -struct event_constraint *intel_pebs_constraints(struct perf_event *event); - -void intel_pmu_pebs_enable(struct perf_event *event); - -void intel_pmu_pebs_disable(struct perf_event *event); - -void intel_pmu_pebs_enable_all(void); - -void intel_pmu_pebs_disable_all(void); - -void intel_ds_init(void); - -void intel_pmu_lbr_reset(void); - -void intel_pmu_lbr_enable(struct perf_event *event); - -void intel_pmu_lbr_disable(struct perf_event *event); - -void intel_pmu_lbr_enable_all(void); - -void intel_pmu_lbr_disable_all(void); - -void intel_pmu_lbr_read(void); - -void intel_pmu_lbr_init_core(void); - -void intel_pmu_lbr_init_nhm(void); - -void intel_pmu_lbr_init_atom(void); - -void intel_pmu_lbr_init_snb(void); - -int intel_pmu_setup_lbr_filter(struct perf_event *event); - -int p4_pmu_init(void); - -int p6_pmu_init(void); - -#else /* CONFIG_CPU_SUP_INTEL */ - -static inline void reserve_ds_buffers(void) -{ -} - -static inline void release_ds_buffers(void) -{ -} - -static inline int intel_pmu_init(void) -{ - return 0; -} - -static inline struct intel_shared_regs *allocate_shared_regs(int cpu) -{ - return NULL; -} - -#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_amd.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_amd.c deleted file mode 100644 index 9edc786a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_amd.c +++ /dev/null @@ -1,686 +0,0 @@ -#include <linux/perf_event.h> -#include <linux/export.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <asm/apicdef.h> - -#include "perf_event.h" - -static __initconst const u64 amd_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ - [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ - [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ - [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ - [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(NODE) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ - [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -/* - * AMD Performance Monitor K7 and later. - */ -static const u64 amd_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ -}; - -static u64 amd_pmu_event_map(int hw_event) -{ - return amd_perfmon_event_map[hw_event]; -} - -static int amd_pmu_hw_config(struct perf_event *event) -{ - int ret = x86_pmu_hw_config(event); - - if (ret) - return ret; - - if (has_branch_stack(event)) - return -EOPNOTSUPP; - - if (event->attr.exclude_host && event->attr.exclude_guest) - /* - * When HO == GO == 1 the hardware treats that as GO == HO == 0 - * and will count in both modes. We don't want to count in that - * case so we emulate no-counting by setting US = OS = 0. - */ - event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | - ARCH_PERFMON_EVENTSEL_OS); - else if (event->attr.exclude_host) - event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; - else if (event->attr.exclude_guest) - event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; - - if (event->attr.type != PERF_TYPE_RAW) - return 0; - - event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; - - return 0; -} - -/* - * AMD64 events are detected based on their event codes. - */ -static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) -{ - return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); -} - -static inline int amd_is_nb_event(struct hw_perf_event *hwc) -{ - return (hwc->config & 0xe0) == 0xe0; -} - -static inline int amd_has_nb(struct cpu_hw_events *cpuc) -{ - struct amd_nb *nb = cpuc->amd_nb; - - return nb && nb->nb_id != -1; -} - -static void amd_put_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct amd_nb *nb = cpuc->amd_nb; - int i; - - /* - * only care about NB events - */ - if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) - return; - - /* - * need to scan whole list because event may not have - * been assigned during scheduling - * - * no race condition possible because event can only - * be removed on one CPU at a time AND PMU is disabled - * when we come here - */ - for (i = 0; i < x86_pmu.num_counters; i++) { - if (nb->owners[i] == event) { - cmpxchg(nb->owners+i, event, NULL); - break; - } - } -} - - /* - * AMD64 NorthBridge events need special treatment because - * counter access needs to be synchronized across all cores - * of a package. Refer to BKDG section 3.12 - * - * NB events are events measuring L3 cache, Hypertransport - * traffic. They are identified by an event code >= 0xe00. - * They measure events on the NorthBride which is shared - * by all cores on a package. NB events are counted on a - * shared set of counters. When a NB event is programmed - * in a counter, the data actually comes from a shared - * counter. Thus, access to those counters needs to be - * synchronized. - * - * We implement the synchronization such that no two cores - * can be measuring NB events using the same counters. Thus, - * we maintain a per-NB allocation table. The available slot - * is propagated using the event_constraint structure. - * - * We provide only one choice for each NB event based on - * the fact that only NB events have restrictions. Consequently, - * if a counter is available, there is a guarantee the NB event - * will be assigned to it. If no slot is available, an empty - * constraint is returned and scheduling will eventually fail - * for this event. - * - * Note that all cores attached the same NB compete for the same - * counters to host NB events, this is why we use atomic ops. Some - * multi-chip CPUs may have more than one NB. - * - * Given that resources are allocated (cmpxchg), they must be - * eventually freed for others to use. This is accomplished by - * calling amd_put_event_constraints(). - * - * Non NB events are not impacted by this restriction. - */ -static struct event_constraint * -amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct amd_nb *nb = cpuc->amd_nb; - struct perf_event *old = NULL; - int max = x86_pmu.num_counters; - int i, j, k = -1; - - /* - * if not NB event or no NB, then no constraints - */ - if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) - return &unconstrained; - - /* - * detect if already present, if so reuse - * - * cannot merge with actual allocation - * because of possible holes - * - * event can already be present yet not assigned (in hwc->idx) - * because of successive calls to x86_schedule_events() from - * hw_perf_group_sched_in() without hw_perf_enable() - */ - for (i = 0; i < max; i++) { - /* - * keep track of first free slot - */ - if (k == -1 && !nb->owners[i]) - k = i; - - /* already present, reuse */ - if (nb->owners[i] == event) - goto done; - } - /* - * not present, so grab a new slot - * starting either at: - */ - if (hwc->idx != -1) { - /* previous assignment */ - i = hwc->idx; - } else if (k != -1) { - /* start from free slot found */ - i = k; - } else { - /* - * event not found, no slot found in - * first pass, try again from the - * beginning - */ - i = 0; - } - j = i; - do { - old = cmpxchg(nb->owners+i, NULL, event); - if (!old) - break; - if (++i == max) - i = 0; - } while (i != j); -done: - if (!old) - return &nb->event_constraints[i]; - - return &emptyconstraint; -} - -static struct amd_nb *amd_alloc_nb(int cpu) -{ - struct amd_nb *nb; - int i; - - nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO, - cpu_to_node(cpu)); - if (!nb) - return NULL; - - nb->nb_id = -1; - - /* - * initialize all possible NB constraints - */ - for (i = 0; i < x86_pmu.num_counters; i++) { - __set_bit(i, nb->event_constraints[i].idxmsk); - nb->event_constraints[i].weight = 1; - } - return nb; -} - -static int amd_pmu_cpu_prepare(int cpu) -{ - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - - WARN_ON_ONCE(cpuc->amd_nb); - - if (boot_cpu_data.x86_max_cores < 2) - return NOTIFY_OK; - - cpuc->amd_nb = amd_alloc_nb(cpu); - if (!cpuc->amd_nb) - return NOTIFY_BAD; - - return NOTIFY_OK; -} - -static void amd_pmu_cpu_starting(int cpu) -{ - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - struct amd_nb *nb; - int i, nb_id; - - cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; - - if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) - return; - - nb_id = amd_get_nb_id(cpu); - WARN_ON_ONCE(nb_id == BAD_APICID); - - for_each_online_cpu(i) { - nb = per_cpu(cpu_hw_events, i).amd_nb; - if (WARN_ON_ONCE(!nb)) - continue; - - if (nb->nb_id == nb_id) { - cpuc->kfree_on_online = cpuc->amd_nb; - cpuc->amd_nb = nb; - break; - } - } - - cpuc->amd_nb->nb_id = nb_id; - cpuc->amd_nb->refcnt++; -} - -static void amd_pmu_cpu_dead(int cpu) -{ - struct cpu_hw_events *cpuhw; - - if (boot_cpu_data.x86_max_cores < 2) - return; - - cpuhw = &per_cpu(cpu_hw_events, cpu); - - if (cpuhw->amd_nb) { - struct amd_nb *nb = cpuhw->amd_nb; - - if (nb->nb_id == -1 || --nb->refcnt == 0) - kfree(nb); - - cpuhw->amd_nb = NULL; - } -} - -PMU_FORMAT_ATTR(event, "config:0-7,32-35"); -PMU_FORMAT_ATTR(umask, "config:8-15" ); -PMU_FORMAT_ATTR(edge, "config:18" ); -PMU_FORMAT_ATTR(inv, "config:23" ); -PMU_FORMAT_ATTR(cmask, "config:24-31" ); - -static struct attribute *amd_format_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_inv.attr, - &format_attr_cmask.attr, - NULL, -}; - -static __initconst const struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, - .disable = x86_pmu_disable_event, - .hw_config = amd_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS, - .cntval_bits = 48, - .cntval_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, - .get_event_constraints = amd_get_event_constraints, - .put_event_constraints = amd_put_event_constraints, - - .format_attrs = amd_format_attr, - - .cpu_prepare = amd_pmu_cpu_prepare, - .cpu_starting = amd_pmu_cpu_starting, - .cpu_dead = amd_pmu_cpu_dead, -}; - -/* AMD Family 15h */ - -#define AMD_EVENT_TYPE_MASK 0x000000F0ULL - -#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL -#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL -#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL -#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL -#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL -#define AMD_EVENT_EX_LS 0x000000C0ULL -#define AMD_EVENT_DE 0x000000D0ULL -#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL - -/* - * AMD family 15h event code/PMC mappings: - * - * type = event_code & 0x0F0: - * - * 0x000 FP PERF_CTL[5:3] - * 0x010 FP PERF_CTL[5:3] - * 0x020 LS PERF_CTL[5:0] - * 0x030 LS PERF_CTL[5:0] - * 0x040 DC PERF_CTL[5:0] - * 0x050 DC PERF_CTL[5:0] - * 0x060 CU PERF_CTL[2:0] - * 0x070 CU PERF_CTL[2:0] - * 0x080 IC/DE PERF_CTL[2:0] - * 0x090 IC/DE PERF_CTL[2:0] - * 0x0A0 --- - * 0x0B0 --- - * 0x0C0 EX/LS PERF_CTL[5:0] - * 0x0D0 DE PERF_CTL[2:0] - * 0x0E0 NB NB_PERF_CTL[3:0] - * 0x0F0 NB NB_PERF_CTL[3:0] - * - * Exceptions: - * - * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) - * 0x003 FP PERF_CTL[3] - * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) - * 0x00B FP PERF_CTL[3] - * 0x00D FP PERF_CTL[3] - * 0x023 DE PERF_CTL[2:0] - * 0x02D LS PERF_CTL[3] - * 0x02E LS PERF_CTL[3,0] - * 0x031 LS PERF_CTL[2:0] (**) - * 0x043 CU PERF_CTL[2:0] - * 0x045 CU PERF_CTL[2:0] - * 0x046 CU PERF_CTL[2:0] - * 0x054 CU PERF_CTL[2:0] - * 0x055 CU PERF_CTL[2:0] - * 0x08F IC PERF_CTL[0] - * 0x187 DE PERF_CTL[0] - * 0x188 DE PERF_CTL[0] - * 0x0DB EX PERF_CTL[5:0] - * 0x0DC LS PERF_CTL[5:0] - * 0x0DD LS PERF_CTL[5:0] - * 0x0DE LS PERF_CTL[5:0] - * 0x0DF LS PERF_CTL[5:0] - * 0x1C0 EX PERF_CTL[5:3] - * 0x1D6 EX PERF_CTL[5:0] - * 0x1D8 EX PERF_CTL[5:0] - * - * (*) depending on the umask all FPU counters may be used - * (**) only one unitmask enabled at a time - */ - -static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); -static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); -static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); -static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); -static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); -static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); - -static struct event_constraint * -amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - unsigned int event_code = amd_get_event_code(hwc); - - switch (event_code & AMD_EVENT_TYPE_MASK) { - case AMD_EVENT_FP: - switch (event_code) { - case 0x000: - if (!(hwc->config & 0x0000F000ULL)) - break; - if (!(hwc->config & 0x00000F00ULL)) - break; - return &amd_f15_PMC3; - case 0x004: - if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) - break; - return &amd_f15_PMC3; - case 0x003: - case 0x00B: - case 0x00D: - return &amd_f15_PMC3; - } - return &amd_f15_PMC53; - case AMD_EVENT_LS: - case AMD_EVENT_DC: - case AMD_EVENT_EX_LS: - switch (event_code) { - case 0x023: - case 0x043: - case 0x045: - case 0x046: - case 0x054: - case 0x055: - return &amd_f15_PMC20; - case 0x02D: - return &amd_f15_PMC3; - case 0x02E: - return &amd_f15_PMC30; - case 0x031: - if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) - return &amd_f15_PMC20; - return &emptyconstraint; - case 0x1C0: - return &amd_f15_PMC53; - default: - return &amd_f15_PMC50; - } - case AMD_EVENT_CU: - case AMD_EVENT_IC_DE: - case AMD_EVENT_DE: - switch (event_code) { - case 0x08F: - case 0x187: - case 0x188: - return &amd_f15_PMC0; - case 0x0DB ... 0x0DF: - case 0x1D6: - case 0x1D8: - return &amd_f15_PMC50; - default: - return &amd_f15_PMC20; - } - case AMD_EVENT_NB: - /* not yet implemented */ - return &emptyconstraint; - default: - return &emptyconstraint; - } -} - -static __initconst const struct x86_pmu amd_pmu_f15h = { - .name = "AMD Family 15h", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, - .disable = x86_pmu_disable_event, - .hw_config = amd_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_F15H_PERF_CTL, - .perfctr = MSR_F15H_PERF_CTR, - .event_map = amd_pmu_event_map, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS_F15H, - .cntval_bits = 48, - .cntval_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, - .get_event_constraints = amd_get_event_constraints_f15h, - /* nortbridge counters not yet implemented: */ -#if 0 - .put_event_constraints = amd_put_event_constraints, - - .cpu_prepare = amd_pmu_cpu_prepare, - .cpu_dead = amd_pmu_cpu_dead, -#endif - .cpu_starting = amd_pmu_cpu_starting, - .format_attrs = amd_format_attr, -}; - -__init int amd_pmu_init(void) -{ - /* Performance-monitoring supported from K7 and later: */ - if (boot_cpu_data.x86 < 6) - return -ENODEV; - - /* - * If core performance counter extensions exists, it must be - * family 15h, otherwise fail. See x86_pmu_addr_offset(). - */ - switch (boot_cpu_data.x86) { - case 0x15: - if (!cpu_has_perfctr_core) - return -ENODEV; - x86_pmu = amd_pmu_f15h; - break; - default: - if (cpu_has_perfctr_core) - return -ENODEV; - x86_pmu = amd_pmu; - break; - } - - /* Events are common for all AMDs */ - memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - return 0; -} - -void amd_pmu_enable_virt(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - cpuc->perf_ctr_virt_mask = 0; - - /* Reload all events */ - x86_pmu_disable_all(); - x86_pmu_enable_all(0); -} -EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); - -void amd_pmu_disable_virt(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - /* - * We only mask out the Host-only bit so that host-only counting works - * when SVM is disabled. If someone sets up a guest-only counter when - * SVM is disabled the Guest-only bits still gets set and the counter - * will not count anything. - */ - cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; - - /* Reload all events */ - x86_pmu_disable_all(); - x86_pmu_enable_all(0); -} -EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_amd_ibs.c deleted file mode 100644 index 3b8a2d30..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Performance events - AMD IBS - * - * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter - * - * For licencing details see kernel-base/COPYING - */ - -#include <linux/perf_event.h> -#include <linux/module.h> -#include <linux/pci.h> - -#include <asm/apic.h> - -static u32 ibs_caps; - -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) - -static struct pmu perf_ibs; - -static int perf_ibs_init(struct perf_event *event) -{ - if (perf_ibs.type != event->attr.type) - return -ENOENT; - return 0; -} - -static int perf_ibs_add(struct perf_event *event, int flags) -{ - return 0; -} - -static void perf_ibs_del(struct perf_event *event, int flags) -{ -} - -static struct pmu perf_ibs = { - .event_init= perf_ibs_init, - .add= perf_ibs_add, - .del= perf_ibs_del, -}; - -static __init int perf_event_ibs_init(void) -{ - if (!ibs_caps) - return -ENODEV; /* ibs not supported by the cpu */ - - perf_pmu_register(&perf_ibs, "ibs", -1); - printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); - - return 0; -} - -#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ - -static __init int perf_event_ibs_init(void) { return 0; } - -#endif - -/* IBS - apic initialization, for perf and oprofile */ - -static __init u32 __get_ibs_caps(void) -{ - u32 caps; - unsigned int max_level; - - if (!boot_cpu_has(X86_FEATURE_IBS)) - return 0; - - /* check IBS cpuid feature flags */ - max_level = cpuid_eax(0x80000000); - if (max_level < IBS_CPUID_FEATURES) - return IBS_CAPS_DEFAULT; - - caps = cpuid_eax(IBS_CPUID_FEATURES); - if (!(caps & IBS_CAPS_AVAIL)) - /* cpuid flags not valid */ - return IBS_CAPS_DEFAULT; - - return caps; -} - -u32 get_ibs_caps(void) -{ - return ibs_caps; -} - -EXPORT_SYMBOL(get_ibs_caps); - -static inline int get_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); -} - -static inline int put_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, 0, 1); -} - -/* - * Check and reserve APIC extended interrupt LVT offset for IBS if available. - */ -static inline int ibs_eilvt_valid(void) -{ - int offset; - u64 val; - int valid = 0; - - preempt_disable(); - - rdmsrl(MSR_AMD64_IBSCTL, val); - offset = val & IBSCTL_LVT_OFFSET_MASK; - - if (!(val & IBSCTL_LVT_OFFSET_VALID)) { - pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - if (!get_eilvt(offset)) { - pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - valid = 1; -out: - preempt_enable(); - - return valid; -} - -static int setup_ibs_ctl(int ibs_eilvt_off) -{ - struct pci_dev *cpu_cfg; - int nodes; - u32 value = 0; - - nodes = 0; - cpu_cfg = NULL; - do { - cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_10H_NB_MISC, - cpu_cfg); - if (!cpu_cfg) - break; - ++nodes; - pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off - | IBSCTL_LVT_OFFSET_VALID); - pci_read_config_dword(cpu_cfg, IBSCTL, &value); - if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { - pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x\n", value); - return -EINVAL; - } - } while (1); - - if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS\n"); - return -ENODEV; - } - - return 0; -} - -/* - * This runs only on the current cpu. We try to find an LVT offset and - * setup the local APIC. For this we must disable preemption. On - * success we initialize all nodes with this offset. This updates then - * the offset in the IBS_CTL per-node msr. The per-core APIC setup of - * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that - * is using the new offset. - */ -static int force_ibs_eilvt_setup(void) -{ - int offset; - int ret; - - preempt_disable(); - /* find the next free available EILVT entry, skip offset 0 */ - for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { - if (get_eilvt(offset)) - break; - } - preempt_enable(); - - if (offset == APIC_EILVT_NR_MAX) { - printk(KERN_DEBUG "No EILVT entry available\n"); - return -EBUSY; - } - - ret = setup_ibs_ctl(offset); - if (ret) - goto out; - - if (!ibs_eilvt_valid()) { - ret = -EFAULT; - goto out; - } - - pr_info("IBS: LVT offset %d assigned\n", offset); - - return 0; -out: - preempt_disable(); - put_eilvt(offset); - preempt_enable(); - return ret; -} - -static inline int get_ibs_lvt_offset(void) -{ - u64 val; - - rdmsrl(MSR_AMD64_IBSCTL, val); - if (!(val & IBSCTL_LVT_OFFSET_VALID)) - return -EINVAL; - - return val & IBSCTL_LVT_OFFSET_MASK; -} - -static void setup_APIC_ibs(void *dummy) -{ - int offset; - - offset = get_ibs_lvt_offset(); - if (offset < 0) - goto failed; - - if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) - return; -failed: - pr_warn("perf: IBS APIC setup failed on cpu #%d\n", - smp_processor_id()); -} - -static void clear_APIC_ibs(void *dummy) -{ - int offset; - - offset = get_ibs_lvt_offset(); - if (offset >= 0) - setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); -} - -static int __cpuinit -perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - setup_APIC_ibs(NULL); - break; - case CPU_DYING: - clear_APIC_ibs(NULL); - break; - default: - break; - } - - return NOTIFY_OK; -} - -static __init int amd_ibs_init(void) -{ - u32 caps; - int ret = -EINVAL; - - caps = __get_ibs_caps(); - if (!caps) - return -ENODEV; /* ibs not supported by the cpu */ - - /* - * Force LVT offset assignment for family 10h: The offsets are - * not assigned by the BIOS for this family, so the OS is - * responsible for doing it. If the OS assignment fails, fall - * back to BIOS settings and try to setup this. - */ - if (boot_cpu_data.x86 == 0x10) - force_ibs_eilvt_setup(); - - if (!ibs_eilvt_valid()) - goto out; - - get_online_cpus(); - ibs_caps = caps; - /* make ibs_caps visible to other cpus: */ - smp_mb(); - perf_cpu_notifier(perf_ibs_cpu_notifier); - smp_call_function(setup_APIC_ibs, NULL, 1); - put_online_cpus(); - - ret = perf_event_ibs_init(); -out: - if (ret) - pr_err("Failed to setup IBS, %d\n", ret); - return ret; -} - -/* Since we need the pci subsystem to init ibs we can't do this earlier: */ -device_initcall(amd_ibs_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel.c deleted file mode 100644 index 26b3e2fe..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel.c +++ /dev/null @@ -1,1886 +0,0 @@ -/* - * Per core/cpu state - * - * Used to coordinate shared registers between HT threads or - * among events on a single PMU. - */ - -#include <linux/stddef.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/export.h> - -#include <asm/hardirq.h> -#include <asm/apic.h> - -#include "perf_event.h" - -/* - * Intel PerfMon, used on Core and later. - */ -static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, - [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ -}; - -static struct event_constraint intel_core_event_constraints[] __read_mostly = -{ - INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ - INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ - INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_core2_event_constraints[] __read_mostly = -{ - FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ - INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ - INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ - INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ - INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ - INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ - INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = -{ - FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ - INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ - INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ - INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ - INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ - INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ - INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ - INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ - EVENT_CONSTRAINT_END -}; - -static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = -{ - INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), - EVENT_EXTRA_END -}; - -static struct event_constraint intel_westmere_event_constraints[] __read_mostly = -{ - FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ - INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ - INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ - INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_snb_event_constraints[] __read_mostly = -{ - FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ - INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ - INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ - EVENT_CONSTRAINT_END -}; - -static struct extra_reg intel_westmere_extra_regs[] __read_mostly = -{ - INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), - INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), - EVENT_EXTRA_END -}; - -static struct event_constraint intel_v1_event_constraints[] __read_mostly = -{ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_gen_event_constraints[] __read_mostly = -{ - FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ - EVENT_CONSTRAINT_END -}; - -static struct extra_reg intel_snb_extra_regs[] __read_mostly = { - INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), - INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), - EVENT_EXTRA_END -}; - -static u64 intel_pmu_event_map(int hw_event) -{ - return intel_perfmon_event_map[hw_event]; -} - -static __initconst const u64 snb_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ - [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ - [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ - [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ - [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(NODE) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - -}; - -static __initconst const u64 westmere_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ - [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ - [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - /* - * Use RFO, not WRITEBACK, because a write miss would typically occur - * on RFO. - */ - [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(NODE) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01b7, - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x01b7, - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x01b7, - [ C(RESULT_MISS) ] = 0x01b7, - }, - }, -}; - -/* - * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; - * See IA32 SDM Vol 3B 30.6.1.3 - */ - -#define NHM_DMND_DATA_RD (1 << 0) -#define NHM_DMND_RFO (1 << 1) -#define NHM_DMND_IFETCH (1 << 2) -#define NHM_DMND_WB (1 << 3) -#define NHM_PF_DATA_RD (1 << 4) -#define NHM_PF_DATA_RFO (1 << 5) -#define NHM_PF_IFETCH (1 << 6) -#define NHM_OFFCORE_OTHER (1 << 7) -#define NHM_UNCORE_HIT (1 << 8) -#define NHM_OTHER_CORE_HIT_SNP (1 << 9) -#define NHM_OTHER_CORE_HITM (1 << 10) - /* reserved */ -#define NHM_REMOTE_CACHE_FWD (1 << 12) -#define NHM_REMOTE_DRAM (1 << 13) -#define NHM_LOCAL_DRAM (1 << 14) -#define NHM_NON_DRAM (1 << 15) - -#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) -#define NHM_REMOTE (NHM_REMOTE_DRAM) - -#define NHM_DMND_READ (NHM_DMND_DATA_RD) -#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) -#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) - -#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) -#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) -#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) - -static __initconst const u64 nehalem_hw_cache_extra_regs - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, - [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, - [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, - [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, - }, - }, - [ C(NODE) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, - [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, - [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, - [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, - }, - }, -}; - -static __initconst const u64 nehalem_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ - [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ - [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - /* - * Use RFO, not WRITEBACK, because a write miss would typically occur - * on RFO. - */ - [ C(OP_WRITE) ] = { - /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_PREFETCH) ] = { - /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ - [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(NODE) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01b7, - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x01b7, - [ C(RESULT_MISS) ] = 0x01b7, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x01b7, - [ C(RESULT_MISS) ] = 0x01b7, - }, - }, -}; - -static __initconst const u64 core2_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static __initconst const u64 atom_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) -{ - /* user explicitly requested branch sampling */ - if (has_branch_stack(event)) - return true; - - /* implicit branch sampling to correct PEBS skid */ - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) - return true; - - return false; -} - -static void intel_pmu_disable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) - intel_pmu_disable_bts(); - - intel_pmu_pebs_disable_all(); - intel_pmu_lbr_disable_all(); -} - -static void intel_pmu_enable_all(int added) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - intel_pmu_pebs_enable_all(); - intel_pmu_lbr_enable_all(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, - x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - struct perf_event *event = - cpuc->events[X86_PMC_IDX_FIXED_BTS]; - - if (WARN_ON_ONCE(!event)) - return; - - intel_pmu_enable_bts(event->hw.config); - } -} - -/* - * Workaround for: - * Intel Errata AAK100 (model 26) - * Intel Errata AAP53 (model 30) - * Intel Errata BD53 (model 44) - * - * The official story: - * These chips need to be 'reset' when adding counters by programming the - * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either - * in sequence on the same PMC or on different PMCs. - * - * In practise it appears some of these events do in fact count, and - * we need to programm all 4 events. - */ -static void intel_pmu_nhm_workaround(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - static const unsigned long nhm_magic[4] = { - 0x4300B5, - 0x4300D2, - 0x4300B1, - 0x4300B1 - }; - struct perf_event *event; - int i; - - /* - * The Errata requires below steps: - * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; - * 2) Configure 4 PERFEVTSELx with the magic events and clear - * the corresponding PMCx; - * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; - * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; - * 5) Clear 4 pairs of ERFEVTSELx and PMCx; - */ - - /* - * The real steps we choose are a little different from above. - * A) To reduce MSR operations, we don't run step 1) as they - * are already cleared before this function is called; - * B) Call x86_perf_event_update to save PMCx before configuring - * PERFEVTSELx with magic number; - * C) With step 5), we do clear only when the PERFEVTSELx is - * not used currently. - * D) Call x86_perf_event_set_period to restore PMCx; - */ - - /* We always operate 4 pairs of PERF Counters */ - for (i = 0; i < 4; i++) { - event = cpuc->events[i]; - if (event) - x86_perf_event_update(event); - } - - for (i = 0; i < 4; i++) { - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); - wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); - } - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); - - for (i = 0; i < 4; i++) { - event = cpuc->events[i]; - - if (event) { - x86_perf_event_set_period(event); - __x86_pmu_enable_event(&event->hw, - ARCH_PERFMON_EVENTSEL_ENABLE); - } else - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); - } -} - -static void intel_pmu_nhm_enable_all(int added) -{ - if (added) - intel_pmu_nhm_workaround(); - intel_pmu_enable_all(added); -} - -static inline u64 intel_pmu_get_status(void) -{ - u64 status; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - - return status; -} - -static inline void intel_pmu_ack_status(u64 ack) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); -} - -static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) -{ - int idx = hwc->idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, mask; - - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - wrmsrl(hwc->config_base, ctrl_val); -} - -static void intel_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { - intel_pmu_disable_bts(); - intel_pmu_drain_bts_buffer(); - return; - } - - cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); - cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); - - /* - * must disable before any actual event - * because any event may be combined with LBR - */ - if (intel_pmu_needs_lbr_smpl(event)) - intel_pmu_lbr_disable(event); - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_disable_fixed(hwc); - return; - } - - x86_pmu_disable_event(event); - - if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_disable(event); -} - -static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) -{ - int idx = hwc->idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; - - /* - * Enable IRQ generation (0x8), - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: - */ - bits = 0x8ULL; - if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; - - /* - * ANY bit is supported in v3 and up - */ - if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) - bits |= 0x4; - - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - wrmsrl(hwc->config_base, ctrl_val); -} - -static void intel_pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { - if (!__this_cpu_read(cpu_hw_events.enabled)) - return; - - intel_pmu_enable_bts(hwc->config); - return; - } - /* - * must enabled before any actual event - * because any event may be combined with LBR - */ - if (intel_pmu_needs_lbr_smpl(event)) - intel_pmu_lbr_enable(event); - - if (event->attr.exclude_host) - cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); - if (event->attr.exclude_guest) - cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc); - return; - } - - if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_enable(event); - - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); -} - -/* - * Save and restart an expired event. Called by NMI contexts, - * so it has to be careful about preempting normal event ops: - */ -int intel_pmu_save_and_restart(struct perf_event *event) -{ - x86_perf_event_update(event); - return x86_perf_event_set_period(event); -} - -static void intel_pmu_reset(void) -{ - struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); - unsigned long flags; - int idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); - checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) - checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); - - if (ds) - ds->bts_index = ds->bts_buffer_base; - - local_irq_restore(flags); -} - -/* - * This handler is triggered by the local APIC, so the APIC IRQ handling - * rules apply: - */ -static int intel_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - int bit, loops; - u64 status; - int handled; - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - - /* - * Some chipsets need to unmask the LVTPC in a particular spot - * inside the nmi handler. As a result, the unmasking was pushed - * into all the nmi handlers. - * - * This handler doesn't seem to have any issues with the unmasking - * so it was left at the top. - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - intel_pmu_disable_all(); - handled = intel_pmu_drain_bts_buffer(); - status = intel_pmu_get_status(); - if (!status) { - intel_pmu_enable_all(0); - return handled; - } - - loops = 0; -again: - intel_pmu_ack_status(status); - if (++loops > 100) { - WARN_ONCE(1, "perfevents: irq loop stuck!\n"); - perf_event_print_debug(); - intel_pmu_reset(); - goto done; - } - - inc_irq_stat(apic_perf_irqs); - - intel_pmu_lbr_read(); - - /* - * PEBS overflow sets bit 62 in the global status register - */ - if (__test_and_clear_bit(62, (unsigned long *)&status)) { - handled++; - x86_pmu.drain_pebs(regs); - } - - for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { - struct perf_event *event = cpuc->events[bit]; - - handled++; - - if (!test_bit(bit, cpuc->active_mask)) - continue; - - if (!intel_pmu_save_and_restart(event)) - continue; - - data.period = event->hw.last_period; - - if (has_branch_stack(event)) - data.br_stack = &cpuc->lbr_stack; - - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); - } - - /* - * Repeat if there is more work to be done: - */ - status = intel_pmu_get_status(); - if (status) - goto again; - -done: - intel_pmu_enable_all(0); - return handled; -} - -static struct event_constraint * -intel_bts_constraints(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - unsigned int hw_event, bts_event; - - if (event->attr.freq) - return NULL; - - hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; - bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - - if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) - return &bts_constraint; - - return NULL; -} - -static bool intel_try_alt_er(struct perf_event *event, int orig_idx) -{ - if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) - return false; - - if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { - event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= 0x01bb; - event->hw.extra_reg.idx = EXTRA_REG_RSP_1; - event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; - } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { - event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= 0x01b7; - event->hw.extra_reg.idx = EXTRA_REG_RSP_0; - event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; - } - - if (event->hw.extra_reg.idx == orig_idx) - return false; - - return true; -} - -/* - * manage allocation of shared extra msr for certain events - * - * sharing can be: - * per-cpu: to be shared between the various events on a single PMU - * per-core: per-cpu + shared by HT threads - */ -static struct event_constraint * -__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event, - struct hw_perf_event_extra *reg) -{ - struct event_constraint *c = &emptyconstraint; - struct er_account *era; - unsigned long flags; - int orig_idx = reg->idx; - - /* already allocated shared msr */ - if (reg->alloc) - return NULL; /* call x86_get_event_constraint() */ - -again: - era = &cpuc->shared_regs->regs[reg->idx]; - /* - * we use spin_lock_irqsave() to avoid lockdep issues when - * passing a fake cpuc - */ - raw_spin_lock_irqsave(&era->lock, flags); - - if (!atomic_read(&era->ref) || era->config == reg->config) { - - /* lock in msr value */ - era->config = reg->config; - era->reg = reg->reg; - - /* one more user */ - atomic_inc(&era->ref); - - /* no need to reallocate during incremental event scheduling */ - reg->alloc = 1; - - /* - * need to call x86_get_event_constraint() - * to check if associated event has constraints - */ - c = NULL; - } else if (intel_try_alt_er(event, orig_idx)) { - raw_spin_unlock_irqrestore(&era->lock, flags); - goto again; - } - raw_spin_unlock_irqrestore(&era->lock, flags); - - return c; -} - -static void -__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, - struct hw_perf_event_extra *reg) -{ - struct er_account *era; - - /* - * only put constraint if extra reg was actually - * allocated. Also takes care of event which do - * not use an extra shared reg - */ - if (!reg->alloc) - return; - - era = &cpuc->shared_regs->regs[reg->idx]; - - /* one fewer user */ - atomic_dec(&era->ref); - - /* allocate again next time */ - reg->alloc = 0; -} - -static struct event_constraint * -intel_shared_regs_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - struct event_constraint *c = NULL, *d; - struct hw_perf_event_extra *xreg, *breg; - - xreg = &event->hw.extra_reg; - if (xreg->idx != EXTRA_REG_NONE) { - c = __intel_shared_reg_get_constraints(cpuc, event, xreg); - if (c == &emptyconstraint) - return c; - } - breg = &event->hw.branch_reg; - if (breg->idx != EXTRA_REG_NONE) { - d = __intel_shared_reg_get_constraints(cpuc, event, breg); - if (d == &emptyconstraint) { - __intel_shared_reg_put_constraints(cpuc, xreg); - c = d; - } - } - return c; -} - -struct event_constraint * -x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct event_constraint *c; - - if (x86_pmu.event_constraints) { - for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) - return c; - } - } - - return &unconstrained; -} - -static struct event_constraint * -intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct event_constraint *c; - - c = intel_bts_constraints(event); - if (c) - return c; - - c = intel_pebs_constraints(event); - if (c) - return c; - - c = intel_shared_regs_constraints(cpuc, event); - if (c) - return c; - - return x86_get_event_constraints(cpuc, event); -} - -static void -intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - struct hw_perf_event_extra *reg; - - reg = &event->hw.extra_reg; - if (reg->idx != EXTRA_REG_NONE) - __intel_shared_reg_put_constraints(cpuc, reg); - - reg = &event->hw.branch_reg; - if (reg->idx != EXTRA_REG_NONE) - __intel_shared_reg_put_constraints(cpuc, reg); -} - -static void intel_put_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - intel_put_shared_regs_event_constraints(cpuc, event); -} - -static int intel_pmu_hw_config(struct perf_event *event) -{ - int ret = x86_pmu_hw_config(event); - - if (ret) - return ret; - - if (event->attr.precise_ip && - (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { - /* - * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P - * (0x003c) so that we can use it with PEBS. - * - * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't - * PEBS capable. However we can use INST_RETIRED.ANY_P - * (0x00c0), which is a PEBS capable event, to get the same - * count. - * - * INST_RETIRED.ANY_P counts the number of cycles that retires - * CNTMASK instructions. By setting CNTMASK to a value (16) - * larger than the maximum number of instructions that can be - * retired per cycle (4) and then inverting the condition, we - * count all cycles that retire 16 or less instructions, which - * is every cycle. - * - * Thereby we gain a PEBS capable cycle counter. - */ - u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); - - - alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); - event->hw.config = alt_config; - } - - if (intel_pmu_needs_lbr_smpl(event)) { - ret = intel_pmu_setup_lbr_filter(event); - if (ret) - return ret; - } - - if (event->attr.type != PERF_TYPE_RAW) - return 0; - - if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) - return 0; - - if (x86_pmu.version < 3) - return -EINVAL; - - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; - - event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; - - return 0; -} - -struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) -{ - if (x86_pmu.guest_get_msrs) - return x86_pmu.guest_get_msrs(nr); - *nr = 0; - return NULL; -} -EXPORT_SYMBOL_GPL(perf_guest_get_msrs); - -static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; - - arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; - arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; - arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; - - *nr = 1; - return arr; -} - -static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_event *event = cpuc->events[idx]; - - arr[idx].msr = x86_pmu_config_addr(idx); - arr[idx].host = arr[idx].guest = 0; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - arr[idx].host = arr[idx].guest = - event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; - - if (event->attr.exclude_host) - arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - else if (event->attr.exclude_guest) - arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - } - - *nr = x86_pmu.num_counters; - return arr; -} - -static void core_pmu_enable_event(struct perf_event *event) -{ - if (!event->attr.exclude_host) - x86_pmu_enable_event(event); -} - -static void core_pmu_enable_all(int added) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct hw_perf_event *hwc = &cpuc->events[idx]->hw; - - if (!test_bit(idx, cpuc->active_mask) || - cpuc->events[idx]->attr.exclude_host) - continue; - - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); - } -} - -PMU_FORMAT_ATTR(event, "config:0-7" ); -PMU_FORMAT_ATTR(umask, "config:8-15" ); -PMU_FORMAT_ATTR(edge, "config:18" ); -PMU_FORMAT_ATTR(pc, "config:19" ); -PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ -PMU_FORMAT_ATTR(inv, "config:23" ); -PMU_FORMAT_ATTR(cmask, "config:24-31" ); - -static struct attribute *intel_arch_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_pc.attr, - &format_attr_inv.attr, - &format_attr_cmask.attr, - NULL, -}; - -static __initconst const struct x86_pmu core_pmu = { - .name = "core", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = core_pmu_enable_all, - .enable = core_pmu_enable_event, - .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic event period: - */ - .max_period = (1ULL << 31) - 1, - .get_event_constraints = intel_get_event_constraints, - .put_event_constraints = intel_put_event_constraints, - .event_constraints = intel_core_event_constraints, - .guest_get_msrs = core_guest_get_msrs, - .format_attrs = intel_arch_formats_attr, -}; - -struct intel_shared_regs *allocate_shared_regs(int cpu) -{ - struct intel_shared_regs *regs; - int i; - - regs = kzalloc_node(sizeof(struct intel_shared_regs), - GFP_KERNEL, cpu_to_node(cpu)); - if (regs) { - /* - * initialize the locks to keep lockdep happy - */ - for (i = 0; i < EXTRA_REG_MAX; i++) - raw_spin_lock_init(®s->regs[i].lock); - - regs->core_id = -1; - } - return regs; -} - -static int intel_pmu_cpu_prepare(int cpu) -{ - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - - if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) - return NOTIFY_OK; - - cpuc->shared_regs = allocate_shared_regs(cpu); - if (!cpuc->shared_regs) - return NOTIFY_BAD; - - return NOTIFY_OK; -} - -static void intel_pmu_cpu_starting(int cpu) -{ - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - int core_id = topology_core_id(cpu); - int i; - - init_debug_store_on_cpu(cpu); - /* - * Deal with CPUs that don't clear their LBRs on power-up. - */ - intel_pmu_lbr_reset(); - - cpuc->lbr_sel = NULL; - - if (!cpuc->shared_regs) - return; - - if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { - for_each_cpu(i, topology_thread_cpumask(cpu)) { - struct intel_shared_regs *pc; - - pc = per_cpu(cpu_hw_events, i).shared_regs; - if (pc && pc->core_id == core_id) { - cpuc->kfree_on_online = cpuc->shared_regs; - cpuc->shared_regs = pc; - break; - } - } - cpuc->shared_regs->core_id = core_id; - cpuc->shared_regs->refcnt++; - } - - if (x86_pmu.lbr_sel_map) - cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; -} - -static void intel_pmu_cpu_dying(int cpu) -{ - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - struct intel_shared_regs *pc; - - pc = cpuc->shared_regs; - if (pc) { - if (pc->core_id == -1 || --pc->refcnt == 0) - kfree(pc); - cpuc->shared_regs = NULL; - } - - fini_debug_store_on_cpu(cpu); -} - -static void intel_pmu_flush_branch_stack(void) -{ - /* - * Intel LBR does not tag entries with the - * PID of the current task, then we need to - * flush it on ctxsw - * For now, we simply reset it - */ - if (x86_pmu.lbr_nr) - intel_pmu_lbr_reset(); -} - -PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); - -static struct attribute *intel_arch3_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_pc.attr, - &format_attr_any.attr, - &format_attr_inv.attr, - &format_attr_cmask.attr, - - &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ - NULL, -}; - -static __initconst const struct x86_pmu intel_pmu = { - .name = "Intel", - .handle_irq = intel_pmu_handle_irq, - .disable_all = intel_pmu_disable_all, - .enable_all = intel_pmu_enable_all, - .enable = intel_pmu_enable_event, - .disable = intel_pmu_disable_event, - .hw_config = intel_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic event period: - */ - .max_period = (1ULL << 31) - 1, - .get_event_constraints = intel_get_event_constraints, - .put_event_constraints = intel_put_event_constraints, - - .format_attrs = intel_arch3_formats_attr, - - .cpu_prepare = intel_pmu_cpu_prepare, - .cpu_starting = intel_pmu_cpu_starting, - .cpu_dying = intel_pmu_cpu_dying, - .guest_get_msrs = intel_guest_get_msrs, - .flush_branch_stack = intel_pmu_flush_branch_stack, -}; - -static __init void intel_clovertown_quirk(void) -{ - /* - * PEBS is unreliable due to: - * - * AJ67 - PEBS may experience CPL leaks - * AJ68 - PEBS PMI may be delayed by one event - * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] - * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS - * - * AJ67 could be worked around by restricting the OS/USR flags. - * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. - * - * AJ106 could possibly be worked around by not allowing LBR - * usage from PEBS, including the fixup. - * AJ68 could possibly be worked around by always programming - * a pebs_event_reset[0] value and coping with the lost events. - * - * But taken together it might just make sense to not enable PEBS on - * these chips. - */ - printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); - x86_pmu.pebs = 0; - x86_pmu.pebs_constraints = NULL; -} - -static __init void intel_sandybridge_quirk(void) -{ - printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); - x86_pmu.pebs = 0; - x86_pmu.pebs_constraints = NULL; -} - -static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { - { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, - { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, - { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, - { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, - { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, - { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, - { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, -}; - -static __init void intel_arch_events_quirk(void) -{ - int bit; - - /* disable event that reported as not presend by cpuid */ - for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { - intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; - printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", - intel_arch_events_map[bit].name); - } -} - -static __init void intel_nehalem_quirk(void) -{ - union cpuid10_ebx ebx; - - ebx.full = x86_pmu.events_maskl; - if (ebx.split.no_branch_misses_retired) { - /* - * Erratum AAJ80 detected, we work it around by using - * the BR_MISP_EXEC.ANY event. This will over-count - * branch-misses, but it's still much better than the - * architectural event which is often completely bogus: - */ - intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; - ebx.split.no_branch_misses_retired = 0; - x86_pmu.events_maskl = ebx.full; - printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); - } -} - -__init int intel_pmu_init(void) -{ - union cpuid10_edx edx; - union cpuid10_eax eax; - union cpuid10_ebx ebx; - unsigned int unused; - int version; - - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - switch (boot_cpu_data.x86) { - case 0x6: - return p6_pmu_init(); - case 0xf: - return p4_pmu_init(); - } - return -ENODEV; - } - - /* - * Check whether the Architectural PerfMon supports - * Branch Misses Retired hw_event or not. - */ - cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); - if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) - return -ENODEV; - - version = eax.split.version_id; - if (version < 2) - x86_pmu = core_pmu; - else - x86_pmu = intel_pmu; - - x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; - x86_pmu.cntval_bits = eax.split.bit_width; - x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; - - x86_pmu.events_maskl = ebx.full; - x86_pmu.events_mask_len = eax.split.mask_length; - - /* - * Quirk: v2 perfmon does not report fixed-purpose events, so - * assume at least 3 events: - */ - if (version > 1) - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - - /* - * v2 and above have a perf capabilities MSR - */ - if (version > 1) { - u64 capabilities; - - rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); - x86_pmu.intel_cap.capabilities = capabilities; - } - - intel_ds_init(); - - x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ - - /* - * Install the hw-cache-events table: - */ - switch (boot_cpu_data.x86_model) { - case 14: /* 65 nm core solo/duo, "Yonah" */ - pr_cont("Core events, "); - break; - - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - x86_add_quirk(intel_clovertown_quirk); - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ - memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - intel_pmu_lbr_init_core(); - - x86_pmu.event_constraints = intel_core2_event_constraints; - x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; - pr_cont("Core2 events, "); - break; - - case 26: /* 45 nm nehalem, "Bloomfield" */ - case 30: /* 45 nm nehalem, "Lynnfield" */ - case 46: /* 45 nm nehalem-ex, "Beckton" */ - memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - - intel_pmu_lbr_init_nhm(); - - x86_pmu.event_constraints = intel_nehalem_event_constraints; - x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; - x86_pmu.enable_all = intel_pmu_nhm_enable_all; - x86_pmu.extra_regs = intel_nehalem_extra_regs; - - /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = - X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); - /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = - X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); - - x86_add_quirk(intel_nehalem_quirk); - - pr_cont("Nehalem events, "); - break; - - case 28: /* Atom */ - memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - intel_pmu_lbr_init_atom(); - - x86_pmu.event_constraints = intel_gen_event_constraints; - x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; - pr_cont("Atom events, "); - break; - - case 37: /* 32 nm nehalem, "Clarkdale" */ - case 44: /* 32 nm nehalem, "Gulftown" */ - case 47: /* 32 nm Xeon E7 */ - memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - - intel_pmu_lbr_init_nhm(); - - x86_pmu.event_constraints = intel_westmere_event_constraints; - x86_pmu.enable_all = intel_pmu_nhm_enable_all; - x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; - x86_pmu.extra_regs = intel_westmere_extra_regs; - x86_pmu.er_flags |= ERF_HAS_RSP_1; - - /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = - X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); - /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = - X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); - - pr_cont("Westmere events, "); - break; - - case 42: /* SandyBridge */ - x86_add_quirk(intel_sandybridge_quirk); - case 45: /* SandyBridge, "Romely-EP" */ - memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - intel_pmu_lbr_init_snb(); - - x86_pmu.event_constraints = intel_snb_event_constraints; - x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; - x86_pmu.extra_regs = intel_snb_extra_regs; - /* all extra regs are per-cpu when HT is on */ - x86_pmu.er_flags |= ERF_HAS_RSP_1; - x86_pmu.er_flags |= ERF_NO_HT_SHARING; - - /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = - X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); - /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = - X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); - - pr_cont("SandyBridge events, "); - break; - - default: - switch (x86_pmu.version) { - case 1: - x86_pmu.event_constraints = intel_v1_event_constraints; - pr_cont("generic architected perfmon v1, "); - break; - default: - /* - * default constraints for v2 and up - */ - x86_pmu.event_constraints = intel_gen_event_constraints; - pr_cont("generic architected perfmon, "); - break; - } - } - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel_ds.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel_ds.c deleted file mode 100644 index 7f64df19..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ /dev/null @@ -1,725 +0,0 @@ -#include <linux/bitops.h> -#include <linux/types.h> -#include <linux/slab.h> - -#include <asm/perf_event.h> -#include <asm/insn.h> - -#include "perf_event.h" - -/* The size of a BTS record in bytes: */ -#define BTS_RECORD_SIZE 24 - -#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE PAGE_SIZE - -/* - * pebs_record_32 for p4 and core not supported - -struct pebs_record_32 { - u32 flags, ip; - u32 ax, bc, cx, dx; - u32 si, di, bp, sp; -}; - - */ - -struct pebs_record_core { - u64 flags, ip; - u64 ax, bx, cx, dx; - u64 si, di, bp, sp; - u64 r8, r9, r10, r11; - u64 r12, r13, r14, r15; -}; - -struct pebs_record_nhm { - u64 flags, ip; - u64 ax, bx, cx, dx; - u64 si, di, bp, sp; - u64 r8, r9, r10, r11; - u64 r12, r13, r14, r15; - u64 status, dla, dse, lat; -}; - -void init_debug_store_on_cpu(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(unsigned long)ds), - (u32)((u64)(unsigned long)ds >> 32)); -} - -void fini_debug_store_on_cpu(int cpu) -{ - if (!per_cpu(cpu_hw_events, cpu).ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); -} - -static int alloc_pebs_buffer(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh = 1; /* always use a single PEBS record */ - void *buffer; - - if (!x86_pmu.pebs) - return 0; - - buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); - if (unlikely(!buffer)) - return -ENOMEM; - - max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; - ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - - ds->pebs_interrupt_threshold = ds->pebs_buffer_base + - thresh * x86_pmu.pebs_record_size; - - return 0; -} - -static void release_pebs_buffer(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds || !x86_pmu.pebs) - return; - - kfree((void *)(unsigned long)ds->pebs_buffer_base); - ds->pebs_buffer_base = 0; -} - -static int alloc_bts_buffer(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh; - void *buffer; - - if (!x86_pmu.bts) - return 0; - - buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); - if (unlikely(!buffer)) - return -ENOMEM; - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - - return 0; -} - -static void release_bts_buffer(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds || !x86_pmu.bts) - return; - - kfree((void *)(unsigned long)ds->bts_buffer_base); - ds->bts_buffer_base = 0; -} - -static int alloc_ds_buffer(int cpu) -{ - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); - if (unlikely(!ds)) - return -ENOMEM; - - per_cpu(cpu_hw_events, cpu).ds = ds; - - return 0; -} - -static void release_ds_buffer(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); -} - -void release_ds_buffers(void) -{ - int cpu; - - if (!x86_pmu.bts && !x86_pmu.pebs) - return; - - get_online_cpus(); - for_each_online_cpu(cpu) - fini_debug_store_on_cpu(cpu); - - for_each_possible_cpu(cpu) { - release_pebs_buffer(cpu); - release_bts_buffer(cpu); - release_ds_buffer(cpu); - } - put_online_cpus(); -} - -void reserve_ds_buffers(void) -{ - int bts_err = 0, pebs_err = 0; - int cpu; - - x86_pmu.bts_active = 0; - x86_pmu.pebs_active = 0; - - if (!x86_pmu.bts && !x86_pmu.pebs) - return; - - if (!x86_pmu.bts) - bts_err = 1; - - if (!x86_pmu.pebs) - pebs_err = 1; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - if (alloc_ds_buffer(cpu)) { - bts_err = 1; - pebs_err = 1; - } - - if (!bts_err && alloc_bts_buffer(cpu)) - bts_err = 1; - - if (!pebs_err && alloc_pebs_buffer(cpu)) - pebs_err = 1; - - if (bts_err && pebs_err) - break; - } - - if (bts_err) { - for_each_possible_cpu(cpu) - release_bts_buffer(cpu); - } - - if (pebs_err) { - for_each_possible_cpu(cpu) - release_pebs_buffer(cpu); - } - - if (bts_err && pebs_err) { - for_each_possible_cpu(cpu) - release_ds_buffer(cpu); - } else { - if (x86_pmu.bts && !bts_err) - x86_pmu.bts_active = 1; - - if (x86_pmu.pebs && !pebs_err) - x86_pmu.pebs_active = 1; - - for_each_online_cpu(cpu) - init_debug_store_on_cpu(cpu); - } - - put_online_cpus(); -} - -/* - * BTS - */ - -struct event_constraint bts_constraint = - EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); - -void intel_pmu_enable_bts(u64 config) -{ - unsigned long debugctlmsr; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr |= DEBUGCTLMSR_TR; - debugctlmsr |= DEBUGCTLMSR_BTS; - debugctlmsr |= DEBUGCTLMSR_BTINT; - - if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; - - if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; - - update_debugctlmsr(debugctlmsr); -} - -void intel_pmu_disable_bts(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - unsigned long debugctlmsr; - - if (!cpuc->ds) - return; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr &= - ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | - DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); - - update_debugctlmsr(debugctlmsr); -} - -int intel_pmu_drain_bts_buffer(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct bts_record { - u64 from; - u64 to; - u64 flags; - }; - struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_sample_data data; - struct pt_regs regs; - - if (!event) - return 0; - - if (!x86_pmu.bts_active) - return 0; - - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; - - if (top <= at) - return 0; - - ds->bts_index = ds->bts_buffer_base; - - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; - regs.ip = 0; - - /* - * Prepare a generic sample, i.e. fill in the invariant fields. - * We will overwrite the from and to address before we output - * the sample. - */ - perf_prepare_sample(&header, &data, event, ®s); - - if (perf_output_begin(&handle, event, header.size * (top - at))) - return 1; - - for (; at < top; at++) { - data.ip = at->from; - data.addr = at->to; - - perf_output_sample(&handle, &header, &data, event); - } - - perf_output_end(&handle); - - /* There's new data available. */ - event->hw.interrupts++; - event->pending_kill = POLL_IN; - return 1; -} - -/* - * PEBS - */ -struct event_constraint intel_core2_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ - INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ - INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ - INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ - INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ - EVENT_CONSTRAINT_END -}; - -struct event_constraint intel_atom_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ - INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ - INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ - EVENT_CONSTRAINT_END -}; - -struct event_constraint intel_nehalem_pebs_event_constraints[] = { - INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ - INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ - INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ - INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ - INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ - EVENT_CONSTRAINT_END -}; - -struct event_constraint intel_westmere_pebs_event_constraints[] = { - INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ - INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ - INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ - EVENT_CONSTRAINT_END -}; - -struct event_constraint intel_snb_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ - EVENT_CONSTRAINT_END -}; - -struct event_constraint *intel_pebs_constraints(struct perf_event *event) -{ - struct event_constraint *c; - - if (!event->attr.precise_ip) - return NULL; - - if (x86_pmu.pebs_constraints) { - for_each_event_constraint(c, x86_pmu.pebs_constraints) { - if ((event->hw.config & c->cmask) == c->code) - return c; - } - } - - return &emptyconstraint; -} - -void intel_pmu_pebs_enable(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - - cpuc->pebs_enabled |= 1ULL << hwc->idx; -} - -void intel_pmu_pebs_disable(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - cpuc->pebs_enabled &= ~(1ULL << hwc->idx); - if (cpuc->enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); - - hwc->config |= ARCH_PERFMON_EVENTSEL_INT; -} - -void intel_pmu_pebs_enable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); -} - -void intel_pmu_pebs_disable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); -} - -static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - unsigned long from = cpuc->lbr_entries[0].from; - unsigned long old_to, to = cpuc->lbr_entries[0].to; - unsigned long ip = regs->ip; - int is_64bit = 0; - - /* - * We don't need to fixup if the PEBS assist is fault like - */ - if (!x86_pmu.intel_cap.pebs_trap) - return 1; - - /* - * No LBR entry, no basic block, no rewinding - */ - if (!cpuc->lbr_stack.nr || !from || !to) - return 0; - - /* - * Basic blocks should never cross user/kernel boundaries - */ - if (kernel_ip(ip) != kernel_ip(to)) - return 0; - - /* - * unsigned math, either ip is before the start (impossible) or - * the basic block is larger than 1 page (sanity) - */ - if ((ip - to) > PAGE_SIZE) - return 0; - - /* - * We sampled a branch insn, rewind using the LBR stack - */ - if (ip == to) { - regs->ip = from; - return 1; - } - - do { - struct insn insn; - u8 buf[MAX_INSN_SIZE]; - void *kaddr; - - old_to = to; - if (!kernel_ip(ip)) { - int bytes, size = MAX_INSN_SIZE; - - bytes = copy_from_user_nmi(buf, (void __user *)to, size); - if (bytes != size) - return 0; - - kaddr = buf; - } else - kaddr = (void *)to; - -#ifdef CONFIG_X86_64 - is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); -#endif - insn_init(&insn, kaddr, is_64bit); - insn_get_length(&insn); - to += insn.length; - } while (to < ip); - - if (to == ip) { - regs->ip = old_to; - return 1; - } - - /* - * Even though we decoded the basic block, the instruction stream - * never matched the given IP, either the TO or the IP got corrupted. - */ - return 0; -} - -static void __intel_pmu_pebs_event(struct perf_event *event, - struct pt_regs *iregs, void *__pebs) -{ - /* - * We cast to pebs_record_core since that is a subset of - * both formats and we don't use the other fields in this - * routine. - */ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct pebs_record_core *pebs = __pebs; - struct perf_sample_data data; - struct pt_regs regs; - - if (!intel_pmu_save_and_restart(event)) - return; - - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; - - /* - * We use the interrupt regs as a base because the PEBS record - * does not contain a full regs set, specifically it seems to - * lack segment descriptors, which get used by things like - * user_mode(). - * - * In the simple case fix up only the IP and BP,SP regs, for - * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. - * A possible PERF_SAMPLE_REGS will have to transfer all regs. - */ - regs = *iregs; - regs.ip = pebs->ip; - regs.bp = pebs->bp; - regs.sp = pebs->sp; - - if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) - regs.flags |= PERF_EFLAGS_EXACT; - else - regs.flags &= ~PERF_EFLAGS_EXACT; - - if (has_branch_stack(event)) - data.br_stack = &cpuc->lbr_stack; - - if (perf_event_overflow(event, &data, ®s)) - x86_pmu_stop(event, 0); -} - -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct perf_event *event = cpuc->events[0]; /* PMC0 only */ - struct pebs_record_core *at, *top; - int n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; - - /* - * Whatever else happens, drain the thing - */ - ds->pebs_index = ds->pebs_buffer_base; - - if (!test_bit(0, cpuc->active_mask)) - return; - - WARN_ON_ONCE(!event); - - if (!event->attr.precise_ip) - return; - - n = top - at; - if (n <= 0) - return; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ON_ONCE(n > 1); - at += n - 1; - - __intel_pmu_pebs_event(event, iregs, at); -} - -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct pebs_record_nhm *at, *top; - struct perf_event *event = NULL; - u64 status = 0; - int bit, n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; - - ds->pebs_index = ds->pebs_buffer_base; - - n = top - at; - if (n <= 0) - return; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ON_ONCE(n > MAX_PEBS_EVENTS); - - for ( ; at < top; at++) { - for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { - event = cpuc->events[bit]; - if (!test_bit(bit, cpuc->active_mask)) - continue; - - WARN_ON_ONCE(!event); - - if (!event->attr.precise_ip) - continue; - - if (__test_and_set_bit(bit, (unsigned long *)&status)) - continue; - - break; - } - - if (!event || bit >= MAX_PEBS_EVENTS) - continue; - - __intel_pmu_pebs_event(event, iregs, at); - } -} - -/* - * BTS, PEBS probe and setup - */ - -void intel_ds_init(void) -{ - /* - * No support for 32bit formats - */ - if (!boot_cpu_has(X86_FEATURE_DTES64)) - return; - - x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); - x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); - if (x86_pmu.pebs) { - char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; - int format = x86_pmu.intel_cap.pebs_format; - - switch (format) { - case 0: - printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); - x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); - x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; - break; - - case 1: - printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); - x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); - x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - break; - - default: - printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); - x86_pmu.pebs = 0; - } - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel_lbr.c deleted file mode 100644 index 520b4265..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ /dev/null @@ -1,704 +0,0 @@ -#include <linux/perf_event.h> -#include <linux/types.h> - -#include <asm/perf_event.h> -#include <asm/msr.h> -#include <asm/insn.h> - -#include "perf_event.h" - -enum { - LBR_FORMAT_32 = 0x00, - LBR_FORMAT_LIP = 0x01, - LBR_FORMAT_EIP = 0x02, - LBR_FORMAT_EIP_FLAGS = 0x03, -}; - -/* - * Intel LBR_SELECT bits - * Intel Vol3a, April 2011, Section 16.7 Table 16-10 - * - * Hardware branch filter (not available on all CPUs) - */ -#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ -#define LBR_USER_BIT 1 /* do not capture at ring > 0 */ -#define LBR_JCC_BIT 2 /* do not capture conditional branches */ -#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ -#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ -#define LBR_RETURN_BIT 5 /* do not capture near returns */ -#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ -#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ -#define LBR_FAR_BIT 8 /* do not capture far branches */ - -#define LBR_KERNEL (1 << LBR_KERNEL_BIT) -#define LBR_USER (1 << LBR_USER_BIT) -#define LBR_JCC (1 << LBR_JCC_BIT) -#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) -#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) -#define LBR_RETURN (1 << LBR_RETURN_BIT) -#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) -#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) -#define LBR_FAR (1 << LBR_FAR_BIT) - -#define LBR_PLM (LBR_KERNEL | LBR_USER) - -#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ -#define LBR_NOT_SUPP -1 /* LBR filter not supported */ -#define LBR_IGN 0 /* ignored */ - -#define LBR_ANY \ - (LBR_JCC |\ - LBR_REL_CALL |\ - LBR_IND_CALL |\ - LBR_RETURN |\ - LBR_REL_JMP |\ - LBR_IND_JMP |\ - LBR_FAR) - -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) - -#define for_each_branch_sample_type(x) \ - for ((x) = PERF_SAMPLE_BRANCH_USER; \ - (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) - -/* - * x86control flow change classification - * x86control flow changes include branches, interrupts, traps, faults - */ -enum { - X86_BR_NONE = 0, /* unknown */ - - X86_BR_USER = 1 << 0, /* branch target is user */ - X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ - - X86_BR_CALL = 1 << 2, /* call */ - X86_BR_RET = 1 << 3, /* return */ - X86_BR_SYSCALL = 1 << 4, /* syscall */ - X86_BR_SYSRET = 1 << 5, /* syscall return */ - X86_BR_INT = 1 << 6, /* sw interrupt */ - X86_BR_IRET = 1 << 7, /* return from interrupt */ - X86_BR_JCC = 1 << 8, /* conditional */ - X86_BR_JMP = 1 << 9, /* jump */ - X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ - X86_BR_IND_CALL = 1 << 11,/* indirect calls */ -}; - -#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) - -#define X86_BR_ANY \ - (X86_BR_CALL |\ - X86_BR_RET |\ - X86_BR_SYSCALL |\ - X86_BR_SYSRET |\ - X86_BR_INT |\ - X86_BR_IRET |\ - X86_BR_JCC |\ - X86_BR_JMP |\ - X86_BR_IRQ |\ - X86_BR_IND_CALL) - -#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) - -#define X86_BR_ANY_CALL \ - (X86_BR_CALL |\ - X86_BR_IND_CALL |\ - X86_BR_SYSCALL |\ - X86_BR_IRQ |\ - X86_BR_INT) - -static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); - -/* - * We only support LBR implementations that have FREEZE_LBRS_ON_PMI - * otherwise it becomes near impossible to get a reliable stack. - */ - -static void __intel_pmu_lbr_enable(void) -{ - u64 debugctl; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (cpuc->lbr_sel) - wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); - - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); -} - -static void __intel_pmu_lbr_disable(void) -{ - u64 debugctl; - - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); -} - -static void intel_pmu_lbr_reset_32(void) -{ - int i; - - for (i = 0; i < x86_pmu.lbr_nr; i++) - wrmsrl(x86_pmu.lbr_from + i, 0); -} - -static void intel_pmu_lbr_reset_64(void) -{ - int i; - - for (i = 0; i < x86_pmu.lbr_nr; i++) { - wrmsrl(x86_pmu.lbr_from + i, 0); - wrmsrl(x86_pmu.lbr_to + i, 0); - } -} - -void intel_pmu_lbr_reset(void) -{ - if (!x86_pmu.lbr_nr) - return; - - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) - intel_pmu_lbr_reset_32(); - else - intel_pmu_lbr_reset_64(); -} - -void intel_pmu_lbr_enable(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (!x86_pmu.lbr_nr) - return; - - /* - * Reset the LBR stack if we changed task context to - * avoid data leaks. - */ - if (event->ctx->task && cpuc->lbr_context != event->ctx) { - intel_pmu_lbr_reset(); - cpuc->lbr_context = event->ctx; - } - cpuc->br_sel = event->hw.branch_reg.reg; - - cpuc->lbr_users++; -} - -void intel_pmu_lbr_disable(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (!x86_pmu.lbr_nr) - return; - - cpuc->lbr_users--; - WARN_ON_ONCE(cpuc->lbr_users < 0); - - if (cpuc->enabled && !cpuc->lbr_users) { - __intel_pmu_lbr_disable(); - /* avoid stale pointer */ - cpuc->lbr_context = NULL; - } -} - -void intel_pmu_lbr_enable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (cpuc->lbr_users) - __intel_pmu_lbr_enable(); -} - -void intel_pmu_lbr_disable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (cpuc->lbr_users) - __intel_pmu_lbr_disable(); -} - -/* - * TOS = most recently recorded branch - */ -static inline u64 intel_pmu_lbr_tos(void) -{ - u64 tos; - - rdmsrl(x86_pmu.lbr_tos, tos); - - return tos; -} - -static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) -{ - unsigned long mask = x86_pmu.lbr_nr - 1; - u64 tos = intel_pmu_lbr_tos(); - int i; - - for (i = 0; i < x86_pmu.lbr_nr; i++) { - unsigned long lbr_idx = (tos - i) & mask; - union { - struct { - u32 from; - u32 to; - }; - u64 lbr; - } msr_lastbranch; - - rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); - - cpuc->lbr_entries[i].from = msr_lastbranch.from; - cpuc->lbr_entries[i].to = msr_lastbranch.to; - cpuc->lbr_entries[i].mispred = 0; - cpuc->lbr_entries[i].predicted = 0; - cpuc->lbr_entries[i].reserved = 0; - } - cpuc->lbr_stack.nr = i; -} - -/* - * Due to lack of segmentation in Linux the effective address (offset) - * is the same as the linear address, allowing us to merge the LIP and EIP - * LBR formats. - */ -static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) -{ - unsigned long mask = x86_pmu.lbr_nr - 1; - int lbr_format = x86_pmu.intel_cap.lbr_format; - u64 tos = intel_pmu_lbr_tos(); - int i; - - for (i = 0; i < x86_pmu.lbr_nr; i++) { - unsigned long lbr_idx = (tos - i) & mask; - u64 from, to, mis = 0, pred = 0; - - rdmsrl(x86_pmu.lbr_from + lbr_idx, from); - rdmsrl(x86_pmu.lbr_to + lbr_idx, to); - - if (lbr_format == LBR_FORMAT_EIP_FLAGS) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; - from = (u64)((((s64)from) << 1) >> 1); - } - - cpuc->lbr_entries[i].from = from; - cpuc->lbr_entries[i].to = to; - cpuc->lbr_entries[i].mispred = mis; - cpuc->lbr_entries[i].predicted = pred; - cpuc->lbr_entries[i].reserved = 0; - } - cpuc->lbr_stack.nr = i; -} - -void intel_pmu_lbr_read(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - if (!cpuc->lbr_users) - return; - - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) - intel_pmu_lbr_read_32(cpuc); - else - intel_pmu_lbr_read_64(cpuc); - - intel_pmu_lbr_filter(cpuc); -} - -/* - * SW filter is used: - * - in case there is no HW filter - * - in case the HW filter has errata or limitations - */ -static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) -{ - u64 br_type = event->attr.branch_sample_type; - int mask = 0; - - if (br_type & PERF_SAMPLE_BRANCH_USER) - mask |= X86_BR_USER; - - if (br_type & PERF_SAMPLE_BRANCH_KERNEL) - mask |= X86_BR_KERNEL; - - /* we ignore BRANCH_HV here */ - - if (br_type & PERF_SAMPLE_BRANCH_ANY) - mask |= X86_BR_ANY; - - if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) - mask |= X86_BR_ANY_CALL; - - if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) - mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; - - if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) - mask |= X86_BR_IND_CALL; - /* - * stash actual user request into reg, it may - * be used by fixup code for some CPU - */ - event->hw.branch_reg.reg = mask; -} - -/* - * setup the HW LBR filter - * Used only when available, may not be enough to disambiguate - * all branches, may need the help of the SW filter - */ -static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) -{ - struct hw_perf_event_extra *reg; - u64 br_type = event->attr.branch_sample_type; - u64 mask = 0, m; - u64 v; - - for_each_branch_sample_type(m) { - if (!(br_type & m)) - continue; - - v = x86_pmu.lbr_sel_map[m]; - if (v == LBR_NOT_SUPP) - return -EOPNOTSUPP; - - if (v != LBR_IGN) - mask |= v; - } - reg = &event->hw.branch_reg; - reg->idx = EXTRA_REG_LBR; - - /* LBR_SELECT operates in suppress mode so invert mask */ - reg->config = ~mask & x86_pmu.lbr_sel_mask; - - return 0; -} - -int intel_pmu_setup_lbr_filter(struct perf_event *event) -{ - int ret = 0; - - /* - * no LBR on this PMU - */ - if (!x86_pmu.lbr_nr) - return -EOPNOTSUPP; - - /* - * setup SW LBR filter - */ - intel_pmu_setup_sw_lbr_filter(event); - - /* - * setup HW LBR filter, if any - */ - if (x86_pmu.lbr_sel_map) - ret = intel_pmu_setup_hw_lbr_filter(event); - - return ret; -} - -/* - * return the type of control flow change at address "from" - * intruction is not necessarily a branch (in case of interrupt). - * - * The branch type returned also includes the priv level of the - * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). - * - * If a branch type is unknown OR the instruction cannot be - * decoded (e.g., text page not present), then X86_BR_NONE is - * returned. - */ -static int branch_type(unsigned long from, unsigned long to) -{ - struct insn insn; - void *addr; - int bytes, size = MAX_INSN_SIZE; - int ret = X86_BR_NONE; - int ext, to_plm, from_plm; - u8 buf[MAX_INSN_SIZE]; - int is64 = 0; - - to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; - from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; - - /* - * maybe zero if lbr did not fill up after a reset by the time - * we get a PMU interrupt - */ - if (from == 0 || to == 0) - return X86_BR_NONE; - - if (from_plm == X86_BR_USER) { - /* - * can happen if measuring at the user level only - * and we interrupt in a kernel thread, e.g., idle. - */ - if (!current->mm) - return X86_BR_NONE; - - /* may fail if text not present */ - bytes = copy_from_user_nmi(buf, (void __user *)from, size); - if (bytes != size) - return X86_BR_NONE; - - addr = buf; - } else - addr = (void *)from; - - /* - * decoder needs to know the ABI especially - * on 64-bit systems running 32-bit apps - */ -#ifdef CONFIG_X86_64 - is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); -#endif - insn_init(&insn, addr, is64); - insn_get_opcode(&insn); - - switch (insn.opcode.bytes[0]) { - case 0xf: - switch (insn.opcode.bytes[1]) { - case 0x05: /* syscall */ - case 0x34: /* sysenter */ - ret = X86_BR_SYSCALL; - break; - case 0x07: /* sysret */ - case 0x35: /* sysexit */ - ret = X86_BR_SYSRET; - break; - case 0x80 ... 0x8f: /* conditional */ - ret = X86_BR_JCC; - break; - default: - ret = X86_BR_NONE; - } - break; - case 0x70 ... 0x7f: /* conditional */ - ret = X86_BR_JCC; - break; - case 0xc2: /* near ret */ - case 0xc3: /* near ret */ - case 0xca: /* far ret */ - case 0xcb: /* far ret */ - ret = X86_BR_RET; - break; - case 0xcf: /* iret */ - ret = X86_BR_IRET; - break; - case 0xcc ... 0xce: /* int */ - ret = X86_BR_INT; - break; - case 0xe8: /* call near rel */ - case 0x9a: /* call far absolute */ - ret = X86_BR_CALL; - break; - case 0xe0 ... 0xe3: /* loop jmp */ - ret = X86_BR_JCC; - break; - case 0xe9 ... 0xeb: /* jmp */ - ret = X86_BR_JMP; - break; - case 0xff: /* call near absolute, call far absolute ind */ - insn_get_modrm(&insn); - ext = (insn.modrm.bytes[0] >> 3) & 0x7; - switch (ext) { - case 2: /* near ind call */ - case 3: /* far ind call */ - ret = X86_BR_IND_CALL; - break; - case 4: - case 5: - ret = X86_BR_JMP; - break; - } - break; - default: - ret = X86_BR_NONE; - } - /* - * interrupts, traps, faults (and thus ring transition) may - * occur on any instructions. Thus, to classify them correctly, - * we need to first look at the from and to priv levels. If they - * are different and to is in the kernel, then it indicates - * a ring transition. If the from instruction is not a ring - * transition instr (syscall, systenter, int), then it means - * it was a irq, trap or fault. - * - * we have no way of detecting kernel to kernel faults. - */ - if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL - && ret != X86_BR_SYSCALL && ret != X86_BR_INT) - ret = X86_BR_IRQ; - - /* - * branch priv level determined by target as - * is done by HW when LBR_SELECT is implemented - */ - if (ret != X86_BR_NONE) - ret |= to_plm; - - return ret; -} - -/* - * implement actual branch filter based on user demand. - * Hardware may not exactly satisfy that request, thus - * we need to inspect opcodes. Mismatched branches are - * discarded. Therefore, the number of branches returned - * in PERF_SAMPLE_BRANCH_STACK sample may vary. - */ -static void -intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) -{ - u64 from, to; - int br_sel = cpuc->br_sel; - int i, j, type; - bool compress = false; - - /* if sampling all branches, then nothing to filter */ - if ((br_sel & X86_BR_ALL) == X86_BR_ALL) - return; - - for (i = 0; i < cpuc->lbr_stack.nr; i++) { - - from = cpuc->lbr_entries[i].from; - to = cpuc->lbr_entries[i].to; - - type = branch_type(from, to); - - /* if type does not correspond, then discard */ - if (type == X86_BR_NONE || (br_sel & type) != type) { - cpuc->lbr_entries[i].from = 0; - compress = true; - } - } - - if (!compress) - return; - - /* remove all entries with from=0 */ - for (i = 0; i < cpuc->lbr_stack.nr; ) { - if (!cpuc->lbr_entries[i].from) { - j = i; - while (++j < cpuc->lbr_stack.nr) - cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; - cpuc->lbr_stack.nr--; - if (!cpuc->lbr_entries[i].from) - continue; - } - i++; - } -} - -/* - * Map interface branch filters onto LBR filters - */ -static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP - | LBR_IND_JMP | LBR_FAR, - /* - * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches - */ - [PERF_SAMPLE_BRANCH_ANY_CALL] = - LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, - /* - * NHM/WSM erratum: must include IND_JMP to capture IND_CALL - */ - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, -}; - -static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, - [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL - | LBR_FAR, - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, -}; - -/* core */ -void intel_pmu_lbr_init_core(void) -{ - x86_pmu.lbr_nr = 4; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; - - /* - * SW branch filter usage: - * - compensate for lack of HW filter - */ - pr_cont("4-deep LBR, "); -} - -/* nehalem/westmere */ -void intel_pmu_lbr_init_nhm(void) -{ - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; - - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = nhm_lbr_sel_map; - - /* - * SW branch filter usage: - * - workaround LBR_SEL errata (see above) - * - support syscall, sysret capture. - * That requires LBR_FAR but that means far - * jmp need to be filtered out - */ - pr_cont("16-deep LBR, "); -} - -/* sandy bridge */ -void intel_pmu_lbr_init_snb(void) -{ - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; - - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = snb_lbr_sel_map; - - /* - * SW branch filter usage: - * - support syscall, sysret capture. - * That requires LBR_FAR but that means far - * jmp need to be filtered out - */ - pr_cont("16-deep LBR, "); -} - -/* atom */ -void intel_pmu_lbr_init_atom(void) -{ - /* - * only models starting at stepping 10 seems - * to have an operational LBR which can freeze - * on PMU interrupt - */ - if (boot_cpu_data.x86_mask < 10) { - pr_cont("LBR disabled due to erratum"); - return; - } - - x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; - - /* - * SW branch filter usage: - * - compensate for lack of HW filter - */ - pr_cont("8-deep LBR, "); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_p4.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_p4.c deleted file mode 100644 index a2dfacfd..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_p4.c +++ /dev/null @@ -1,1345 +0,0 @@ -/* - * Netburst Performance Events (P4, old Xeon) - * - * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> - * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> - * - * For licencing details see kernel-base/COPYING - */ - -#include <linux/perf_event.h> - -#include <asm/perf_event_p4.h> -#include <asm/hardirq.h> -#include <asm/apic.h> - -#include "perf_event.h" - -#define P4_CNTR_LIMIT 3 -/* - * array indices: 0,1 - HT threads, used with HT enabled cpu - */ -struct p4_event_bind { - unsigned int opcode; /* Event code and ESCR selector */ - unsigned int escr_msr[2]; /* ESCR MSR for this event */ - unsigned int escr_emask; /* valid ESCR EventMask bits */ - unsigned int shared; /* event is shared across threads */ - char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ -}; - -struct p4_pebs_bind { - unsigned int metric_pebs; - unsigned int metric_vert; -}; - -/* it sets P4_PEBS_ENABLE_UOP_TAG as well */ -#define P4_GEN_PEBS_BIND(name, pebs, vert) \ - [P4_PEBS_METRIC__##name] = { \ - .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ - .metric_vert = vert, \ - } - -/* - * note we have P4_PEBS_ENABLE_UOP_TAG always set here - * - * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of - * event configuration to find out which values are to be - * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT - * resgisters - */ -static struct p4_pebs_bind p4_pebs_bind_map[] = { - P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), - P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), - P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), - P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), - P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), - P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), - P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), - P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), - P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), -}; - -/* - * Note that we don't use CCCR1 here, there is an - * exception for P4_BSQ_ALLOCATION but we just have - * no workaround - * - * consider this binding as resources which particular - * event may borrow, it doesn't contain EventMask, - * Tags and friends -- they are left to a caller - */ -static struct p4_event_bind p4_event_bind_map[] = { - [P4_EVENT_TC_DELIVER_MODE] = { - .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), - .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | - P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | - P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | - P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | - P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | - P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | - P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), - .shared = 1, - .cntr = { {4, 5, -1}, {6, 7, -1} }, - }, - [P4_EVENT_BPU_FETCH_REQUEST] = { - .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), - .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_ITLB_REFERENCE] = { - .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), - .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | - P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | - P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_MEMORY_CANCEL] = { - .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), - .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | - P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_MEMORY_COMPLETE] = { - .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), - .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | - P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_LOAD_PORT_REPLAY] = { - .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), - .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_STORE_PORT_REPLAY] = { - .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), - .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_MOB_LOAD_REPLAY] = { - .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), - .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | - P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | - P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | - P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_PAGE_WALK_TYPE] = { - .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), - .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | - P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), - .shared = 1, - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_BSQ_CACHE_REFERENCE] = { - .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), - .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_IOQ_ALLOCATION] = { - .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ - .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), - .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | - P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), - .cntr = { {2, -1, -1}, {3, -1, -1} }, - }, - [P4_EVENT_FSB_DATA_ACTIVITY] = { - .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), - .shared = 1, - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ - .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), - .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), - .cntr = { {0, -1, -1}, {1, -1, -1} }, - }, - [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ - .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), - .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), - .cntr = { {2, -1, -1}, {3, -1, -1} }, - }, - [P4_EVENT_SSE_INPUT_ASSIST] = { - .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_PACKED_SP_UOP] = { - .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_PACKED_DP_UOP] = { - .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_SCALAR_SP_UOP] = { - .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_SCALAR_DP_UOP] = { - .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_64BIT_MMX_UOP] = { - .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_128BIT_MMX_UOP] = { - .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_X87_FP_UOP] = { - .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), - .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_TC_MISC] = { - .opcode = P4_OPCODE(P4_EVENT_TC_MISC), - .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), - .cntr = { {4, 5, -1}, {6, 7, -1} }, - }, - [P4_EVENT_GLOBAL_POWER_EVENTS] = { - .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_TC_MS_XFER] = { - .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), - .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), - .cntr = { {4, 5, -1}, {6, 7, -1} }, - }, - [P4_EVENT_UOP_QUEUE_WRITES] = { - .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), - .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | - P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | - P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), - .cntr = { {4, 5, -1}, {6, 7, -1} }, - }, - [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { - .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), - .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), - .cntr = { {4, 5, -1}, {6, 7, -1} }, - }, - [P4_EVENT_RETIRED_BRANCH_TYPE] = { - .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), - .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), - .cntr = { {4, 5, -1}, {6, 7, -1} }, - }, - [P4_EVENT_RESOURCE_STALL] = { - .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), - .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_WC_BUFFER] = { - .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), - .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | - P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), - .shared = 1, - .cntr = { {8, 9, -1}, {10, 11, -1} }, - }, - [P4_EVENT_B2B_CYCLES] = { - .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .escr_emask = 0, - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_BNR] = { - .opcode = P4_OPCODE(P4_EVENT_BNR), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .escr_emask = 0, - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_SNOOP] = { - .opcode = P4_OPCODE(P4_EVENT_SNOOP), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .escr_emask = 0, - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_RESPONSE] = { - .opcode = P4_OPCODE(P4_EVENT_RESPONSE), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .escr_emask = 0, - .cntr = { {0, -1, -1}, {2, -1, -1} }, - }, - [P4_EVENT_FRONT_END_EVENT] = { - .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | - P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_EXECUTION_EVENT] = { - .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_REPLAY_EVENT] = { - .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | - P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_INSTR_RETIRED] = { - .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), - .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_UOPS_RETIRED] = { - .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), - .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | - P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_UOP_TYPE] = { - .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), - .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | - P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_BRANCH_RETIRED] = { - .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | - P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | - P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | - P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_MISPRED_BRANCH_RETIRED] = { - .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), - .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_X87_ASSIST] = { - .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | - P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | - P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | - P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | - P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_MACHINE_CLEAR] = { - .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | - P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | - P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, - [P4_EVENT_INSTR_COMPLETED] = { - .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), - .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, - .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), - .cntr = { {12, 13, 16}, {14, 15, 17} }, - }, -}; - -#define P4_GEN_CACHE_EVENT(event, bit, metric) \ - p4_config_pack_escr(P4_ESCR_EVENT(event) | \ - P4_ESCR_EMASK_BIT(event, bit)) | \ - p4_config_pack_cccr(metric | \ - P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) - -static __initconst const u64 p4_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_PEBS_METRIC__1stl_cache_load_miss_retired), - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_PEBS_METRIC__2ndl_cache_load_miss_retired), - }, -}, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_PEBS_METRIC__dtlb_load_miss_retired), - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_PEBS_METRIC__dtlb_store_miss_retired), - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, - P4_PEBS_METRIC__none), - [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, - P4_PEBS_METRIC__none), - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(NODE) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -/* - * Because of Netburst being quite restricted in how many - * identical events may run simultaneously, we introduce event aliases, - * ie the different events which have the same functionality but - * utilize non-intersected resources (ESCR/CCCR/counter registers). - * - * This allow us to relax restrictions a bit and run two or more - * identical events together. - * - * Never set any custom internal bits such as P4_CONFIG_HT, - * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are - * either up to date automatically or not applicable at all. - */ -struct p4_event_alias { - u64 original; - u64 alternative; -} p4_event_aliases[] = { - { - /* - * Non-halted cycles can be substituted with non-sleeping cycles (see - * Intel SDM Vol3b for details). We need this alias to be able - * to run nmi-watchdog and 'perf top' (or any other user space tool - * which is interested in running PERF_COUNT_HW_CPU_CYCLES) - * simultaneously. - */ - .original = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | - P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), - .alternative = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)| - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)| - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)| - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)| - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))| - p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | - P4_CCCR_COMPARE), - }, -}; - -static u64 p4_get_alias_event(u64 config) -{ - u64 config_match; - int i; - - /* - * Only event with special mark is allowed, - * we're to be sure it didn't come as malformed - * RAW event. - */ - if (!(config & P4_CONFIG_ALIASABLE)) - return 0; - - config_match = config & P4_CONFIG_EVENT_ALIAS_MASK; - - for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) { - if (config_match == p4_event_aliases[i].original) { - config_match = p4_event_aliases[i].alternative; - break; - } else if (config_match == p4_event_aliases[i].alternative) { - config_match = p4_event_aliases[i].original; - break; - } - } - - if (i >= ARRAY_SIZE(p4_event_aliases)) - return 0; - - return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS); -} - -static u64 p4_general_events[PERF_COUNT_HW_MAX] = { - /* non-halted CPU clocks */ - [PERF_COUNT_HW_CPU_CYCLES] = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | - P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) | - P4_CONFIG_ALIASABLE, - - /* - * retired instructions - * in a sake of simplicity we don't use the FSB tagging - */ - [PERF_COUNT_HW_INSTRUCTIONS] = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | - P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), - - /* cache hits */ - [PERF_COUNT_HW_CACHE_REFERENCES] = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), - - /* cache misses */ - [PERF_COUNT_HW_CACHE_MISSES] = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | - P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), - - /* branch instructions retired */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | - P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), - - /* mispredicted branches retired */ - [PERF_COUNT_HW_BRANCH_MISSES] = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | - P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), - - /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ - [PERF_COUNT_HW_BUS_CYCLES] = - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | - P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | - p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), -}; - -static struct p4_event_bind *p4_config_get_bind(u64 config) -{ - unsigned int evnt = p4_config_unpack_event(config); - struct p4_event_bind *bind = NULL; - - if (evnt < ARRAY_SIZE(p4_event_bind_map)) - bind = &p4_event_bind_map[evnt]; - - return bind; -} - -static u64 p4_pmu_event_map(int hw_event) -{ - struct p4_event_bind *bind; - unsigned int esel; - u64 config; - - config = p4_general_events[hw_event]; - bind = p4_config_get_bind(config); - esel = P4_OPCODE_ESEL(bind->opcode); - config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); - - return config; -} - -/* check cpu model specifics */ -static bool p4_event_match_cpu_model(unsigned int event_idx) -{ - /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ - if (event_idx == P4_EVENT_INSTR_COMPLETED) { - if (boot_cpu_data.x86_model != 3 && - boot_cpu_data.x86_model != 4 && - boot_cpu_data.x86_model != 6) - return false; - } - - /* - * For info - * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 - */ - - return true; -} - -static int p4_validate_raw_event(struct perf_event *event) -{ - unsigned int v, emask; - - /* User data may have out-of-bound event index */ - v = p4_config_unpack_event(event->attr.config); - if (v >= ARRAY_SIZE(p4_event_bind_map)) - return -EINVAL; - - /* It may be unsupported: */ - if (!p4_event_match_cpu_model(v)) - return -EINVAL; - - /* - * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as - * in Architectural Performance Monitoring, it means not - * on _which_ logical cpu to count but rather _when_, ie it - * depends on logical cpu state -- count event if one cpu active, - * none, both or any, so we just allow user to pass any value - * desired. - * - * In turn we always set Tx_OS/Tx_USR bits bound to logical - * cpu without their propagation to another cpu - */ - - /* - * if an event is shared across the logical threads - * the user needs special permissions to be able to use it - */ - if (p4_ht_active() && p4_event_bind_map[v].shared) { - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; - } - - /* ESCR EventMask bits may be invalid */ - emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; - if (emask & ~p4_event_bind_map[v].escr_emask) - return -EINVAL; - - /* - * it may have some invalid PEBS bits - */ - if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) - return -EINVAL; - - v = p4_config_unpack_metric(event->attr.config); - if (v >= ARRAY_SIZE(p4_pebs_bind_map)) - return -EINVAL; - - return 0; -} - -static int p4_hw_config(struct perf_event *event) -{ - int cpu = get_cpu(); - int rc = 0; - u32 escr, cccr; - - /* - * the reason we use cpu that early is that: if we get scheduled - * first time on the same cpu -- we will not need swap thread - * specific flags in config (and will save some cpu cycles) - */ - - cccr = p4_default_cccr_conf(cpu); - escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, - event->attr.exclude_user); - event->hw.config = p4_config_pack_escr(escr) | - p4_config_pack_cccr(cccr); - - if (p4_ht_active() && p4_ht_thread(cpu)) - event->hw.config = p4_set_ht_bit(event->hw.config); - - if (event->attr.type == PERF_TYPE_RAW) { - struct p4_event_bind *bind; - unsigned int esel; - /* - * Clear bits we reserve to be managed by kernel itself - * and never allowed from a user space - */ - event->attr.config &= P4_CONFIG_MASK; - - rc = p4_validate_raw_event(event); - if (rc) - goto out; - - /* - * Note that for RAW events we allow user to use P4_CCCR_RESERVED - * bits since we keep additional info here (for cache events and etc) - */ - event->hw.config |= event->attr.config; - bind = p4_config_get_bind(event->attr.config); - if (!bind) { - rc = -EINVAL; - goto out; - } - esel = P4_OPCODE_ESEL(bind->opcode); - event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); - } - - rc = x86_setup_perfctr(event); -out: - put_cpu(); - return rc; -} - -static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) -{ - u64 v; - - /* an official way for overflow indication */ - rdmsrl(hwc->config_base, v); - if (v & P4_CCCR_OVF) { - wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); - return 1; - } - - /* - * In some circumstances the overflow might issue an NMI but did - * not set P4_CCCR_OVF bit. Because a counter holds a negative value - * we simply check for high bit being set, if it's cleared it means - * the counter has reached zero value and continued counting before - * real NMI signal was received: - */ - rdmsrl(hwc->event_base, v); - if (!(v & ARCH_P4_UNFLAGGED_BIT)) - return 1; - - return 0; -} - -static void p4_pmu_disable_pebs(void) -{ - /* - * FIXME - * - * It's still allowed that two threads setup same cache - * events so we can't simply clear metrics until we knew - * no one is depending on us, so we need kind of counter - * for "ReplayEvent" users. - * - * What is more complex -- RAW events, if user (for some - * reason) will pass some cache event metric with improper - * event opcode -- it's fine from hardware point of view - * but completely nonsense from "meaning" of such action. - * - * So at moment let leave metrics turned on forever -- it's - * ok for now but need to be revisited! - * - * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); - * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); - */ -} - -static inline void p4_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - /* - * If event gets disabled while counter is in overflowed - * state we need to clear P4_CCCR_OVF, otherwise interrupt get - * asserted again and again - */ - (void)checking_wrmsrl(hwc->config_base, - (u64)(p4_config_unpack_cccr(hwc->config)) & - ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); -} - -static void p4_pmu_disable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_event *event = cpuc->events[idx]; - if (!test_bit(idx, cpuc->active_mask)) - continue; - p4_pmu_disable_event(event); - } - - p4_pmu_disable_pebs(); -} - -/* configuration must be valid */ -static void p4_pmu_enable_pebs(u64 config) -{ - struct p4_pebs_bind *bind; - unsigned int idx; - - BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); - - idx = p4_config_unpack_metric(config); - if (idx == P4_PEBS_METRIC__none) - return; - - bind = &p4_pebs_bind_map[idx]; - - (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); - (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); -} - -static void p4_pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int thread = p4_ht_config_thread(hwc->config); - u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); - unsigned int idx = p4_config_unpack_event(hwc->config); - struct p4_event_bind *bind; - u64 escr_addr, cccr; - - bind = &p4_event_bind_map[idx]; - escr_addr = (u64)bind->escr_msr[thread]; - - /* - * - we dont support cascaded counters yet - * - and counter 1 is broken (erratum) - */ - WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); - WARN_ON_ONCE(hwc->idx == 1); - - /* we need a real Event value */ - escr_conf &= ~P4_ESCR_EVENT_MASK; - escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); - - cccr = p4_config_unpack_cccr(hwc->config); - - /* - * it could be Cache event so we need to write metrics - * into additional MSRs - */ - p4_pmu_enable_pebs(hwc->config); - - (void)checking_wrmsrl(escr_addr, escr_conf); - (void)checking_wrmsrl(hwc->config_base, - (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); -} - -static void p4_pmu_enable_all(int added) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx; - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_event *event = cpuc->events[idx]; - if (!test_bit(idx, cpuc->active_mask)) - continue; - p4_pmu_enable_event(event); - } -} - -static int p4_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct perf_event *event; - struct hw_perf_event *hwc; - int idx, handled = 0; - u64 val; - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - int overflow; - - if (!test_bit(idx, cpuc->active_mask)) { - /* catch in-flight IRQs */ - if (__test_and_clear_bit(idx, cpuc->running)) - handled++; - continue; - } - - event = cpuc->events[idx]; - hwc = &event->hw; - - WARN_ON_ONCE(hwc->idx != idx); - - /* it might be unflagged overflow */ - overflow = p4_pmu_clear_cccr_ovf(hwc); - - val = x86_perf_event_update(event); - if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) - continue; - - handled += overflow; - - /* event overflow for sure */ - data.period = event->hw.last_period; - - if (!x86_perf_event_set_period(event)) - continue; - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - /* - * When dealing with the unmasking of the LVTPC on P4 perf hw, it has - * been observed that the OVF bit flag has to be cleared first _before_ - * the LVTPC can be unmasked. - * - * The reason is the NMI line will continue to be asserted while the OVF - * bit is set. This causes a second NMI to generate if the LVTPC is - * unmasked before the OVF bit is cleared, leading to unknown NMI - * messages. - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - return handled; -} - -/* - * swap thread specific fields according to a thread - * we are going to run on - */ -static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) -{ - u32 escr, cccr; - - /* - * we either lucky and continue on same cpu or no HT support - */ - if (!p4_should_swap_ts(hwc->config, cpu)) - return; - - /* - * the event is migrated from an another logical - * cpu, so we need to swap thread specific flags - */ - - escr = p4_config_unpack_escr(hwc->config); - cccr = p4_config_unpack_cccr(hwc->config); - - if (p4_ht_thread(cpu)) { - cccr &= ~P4_CCCR_OVF_PMI_T0; - cccr |= P4_CCCR_OVF_PMI_T1; - if (escr & P4_ESCR_T0_OS) { - escr &= ~P4_ESCR_T0_OS; - escr |= P4_ESCR_T1_OS; - } - if (escr & P4_ESCR_T0_USR) { - escr &= ~P4_ESCR_T0_USR; - escr |= P4_ESCR_T1_USR; - } - hwc->config = p4_config_pack_escr(escr); - hwc->config |= p4_config_pack_cccr(cccr); - hwc->config |= P4_CONFIG_HT; - } else { - cccr &= ~P4_CCCR_OVF_PMI_T1; - cccr |= P4_CCCR_OVF_PMI_T0; - if (escr & P4_ESCR_T1_OS) { - escr &= ~P4_ESCR_T1_OS; - escr |= P4_ESCR_T0_OS; - } - if (escr & P4_ESCR_T1_USR) { - escr &= ~P4_ESCR_T1_USR; - escr |= P4_ESCR_T0_USR; - } - hwc->config = p4_config_pack_escr(escr); - hwc->config |= p4_config_pack_cccr(cccr); - hwc->config &= ~P4_CONFIG_HT; - } -} - -/* - * ESCR address hashing is tricky, ESCRs are not sequential - * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and - * the metric between any ESCRs is laid in range [0xa0,0xe1] - * - * so we make ~70% filled hashtable - */ - -#define P4_ESCR_MSR_BASE 0x000003a0 -#define P4_ESCR_MSR_MAX 0x000003e1 -#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) -#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) -#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr - -static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), - P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), -}; - -static int p4_get_escr_idx(unsigned int addr) -{ - unsigned int idx = P4_ESCR_MSR_IDX(addr); - - if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || - !p4_escr_table[idx] || - p4_escr_table[idx] != addr)) { - WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); - return -1; - } - - return idx; -} - -static int p4_next_cntr(int thread, unsigned long *used_mask, - struct p4_event_bind *bind) -{ - int i, j; - - for (i = 0; i < P4_CNTR_LIMIT; i++) { - j = bind->cntr[thread][i]; - if (j != -1 && !test_bit(j, used_mask)) - return j; - } - - return -1; -} - -static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) -{ - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; - int cpu = smp_processor_id(); - struct hw_perf_event *hwc; - struct p4_event_bind *bind; - unsigned int i, thread, num; - int cntr_idx, escr_idx; - u64 config_alias; - int pass; - - bitmap_zero(used_mask, X86_PMC_IDX_MAX); - bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); - - for (i = 0, num = n; i < n; i++, num--) { - - hwc = &cpuc->event_list[i]->hw; - thread = p4_ht_thread(cpu); - pass = 0; - -again: - /* - * It's possible to hit a circular lock - * between original and alternative events - * if both are scheduled already. - */ - if (pass > 2) - goto done; - - bind = p4_config_get_bind(hwc->config); - escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); - if (unlikely(escr_idx == -1)) - goto done; - - if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { - cntr_idx = hwc->idx; - if (assign) - assign[i] = hwc->idx; - goto reserve; - } - - cntr_idx = p4_next_cntr(thread, used_mask, bind); - if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) { - /* - * Check whether an event alias is still available. - */ - config_alias = p4_get_alias_event(hwc->config); - if (!config_alias) - goto done; - hwc->config = config_alias; - pass++; - goto again; - } - - p4_pmu_swap_config_ts(hwc, cpu); - if (assign) - assign[i] = cntr_idx; -reserve: - set_bit(cntr_idx, used_mask); - set_bit(escr_idx, escr_mask); - } - -done: - return num ? -EINVAL : 0; -} - -PMU_FORMAT_ATTR(cccr, "config:0-31" ); -PMU_FORMAT_ATTR(escr, "config:32-62"); -PMU_FORMAT_ATTR(ht, "config:63" ); - -static struct attribute *intel_p4_formats_attr[] = { - &format_attr_cccr.attr, - &format_attr_escr.attr, - &format_attr_ht.attr, - NULL, -}; - -static __initconst const struct x86_pmu p4_pmu = { - .name = "Netburst P4/Xeon", - .handle_irq = p4_pmu_handle_irq, - .disable_all = p4_pmu_disable_all, - .enable_all = p4_pmu_enable_all, - .enable = p4_pmu_enable_event, - .disable = p4_pmu_disable_event, - .eventsel = MSR_P4_BPU_CCCR0, - .perfctr = MSR_P4_BPU_PERFCTR0, - .event_map = p4_pmu_event_map, - .max_events = ARRAY_SIZE(p4_general_events), - .get_event_constraints = x86_get_event_constraints, - /* - * IF HT disabled we may need to use all - * ARCH_P4_MAX_CCCR counters simulaneously - * though leave it restricted at moment assuming - * HT is on - */ - .num_counters = ARCH_P4_MAX_CCCR, - .apic = 1, - .cntval_bits = ARCH_P4_CNTRVAL_BITS, - .cntval_mask = ARCH_P4_CNTRVAL_MASK, - .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, - .hw_config = p4_hw_config, - .schedule_events = p4_pmu_schedule_events, - /* - * This handles erratum N15 in intel doc 249199-029, - * the counter may not be updated correctly on write - * so we need a second write operation to do the trick - * (the official workaround didn't work) - * - * the former idea is taken from OProfile code - */ - .perfctr_second_write = 1, - - .format_attrs = intel_p4_formats_attr, -}; - -__init int p4_pmu_init(void) -{ - unsigned int low, high; - - /* If we get stripped -- indexing fails */ - BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); - - rdmsr(MSR_IA32_MISC_ENABLE, low, high); - if (!(low & (1 << 7))) { - pr_cont("unsupported Netburst CPU model %d ", - boot_cpu_data.x86_model); - return -ENODEV; - } - - memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Netburst events, "); - - x86_pmu = p4_pmu; - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_p6.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_p6.c deleted file mode 100644 index 32bcfc7d..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perf_event_p6.c +++ /dev/null @@ -1,162 +0,0 @@ -#include <linux/perf_event.h> -#include <linux/types.h> - -#include "perf_event.h" - -/* - * Not sure about some of these - */ -static const u64 p6_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, -}; - -static u64 p6_pmu_event_map(int hw_event) -{ - return p6_perfmon_event_map[hw_event]; -} - -/* - * Event setting that is specified not to count anything. - * We use this to effectively disable a counter. - * - * L2_RQSTS with 0 MESI unit mask. - */ -#define P6_NOP_EVENT 0x0000002EULL - -static struct event_constraint p6_event_constraints[] = -{ - INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ - INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ - INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - EVENT_CONSTRAINT_END -}; - -static void p6_pmu_disable_all(void) -{ - u64 val; - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void p6_pmu_enable_all(int added) -{ - unsigned long val; - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static inline void -p6_pmu_disable_event(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - u64 val = P6_NOP_EVENT; - - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - - (void)checking_wrmsrl(hwc->config_base, val); -} - -static void p6_pmu_enable_event(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - u64 val; - - val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - - (void)checking_wrmsrl(hwc->config_base, val); -} - -PMU_FORMAT_ATTR(event, "config:0-7" ); -PMU_FORMAT_ATTR(umask, "config:8-15" ); -PMU_FORMAT_ATTR(edge, "config:18" ); -PMU_FORMAT_ATTR(pc, "config:19" ); -PMU_FORMAT_ATTR(inv, "config:23" ); -PMU_FORMAT_ATTR(cmask, "config:24-31" ); - -static struct attribute *intel_p6_formats_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - &format_attr_edge.attr, - &format_attr_pc.attr, - &format_attr_inv.attr, - &format_attr_cmask.attr, - NULL, -}; - -static __initconst const struct x86_pmu p6_pmu = { - .name = "p6", - .handle_irq = x86_pmu_handle_irq, - .disable_all = p6_pmu_disable_all, - .enable_all = p6_pmu_enable_all, - .enable = p6_pmu_enable_event, - .disable = p6_pmu_disable_event, - .hw_config = x86_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_P6_EVNTSEL0, - .perfctr = MSR_P6_PERFCTR0, - .event_map = p6_pmu_event_map, - .max_events = ARRAY_SIZE(p6_perfmon_event_map), - .apic = 1, - .max_period = (1ULL << 31) - 1, - .version = 0, - .num_counters = 2, - /* - * Events have 40 bits implemented. However they are designed such - * that bits [32-39] are sign extensions of bit 31. As such the - * effective width of a event for P6-like PMU is 32 bits only. - * - * See IA-32 Intel Architecture Software developer manual Vol 3B - */ - .cntval_bits = 32, - .cntval_mask = (1ULL << 32) - 1, - .get_event_constraints = x86_get_event_constraints, - .event_constraints = p6_event_constraints, - - .format_attrs = intel_p6_formats_attr, -}; - -__init int p6_pmu_init(void) -{ - switch (boot_cpu_data.x86_model) { - case 1: - case 3: /* Pentium Pro */ - case 5: - case 6: /* Pentium II */ - case 7: - case 8: - case 11: /* Pentium III */ - case 9: - case 13: - /* Pentium M */ - break; - default: - pr_cont("unsupported p6 CPU model %d ", - boot_cpu_data.x86_model); - return -ENODEV; - } - - x86_pmu = p6_pmu; - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/perfctr-watchdog.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/perfctr-watchdog.c deleted file mode 100644 index 966512b2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/perfctr-watchdog.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * local apic based NMI watchdog for various CPUs. - * - * This file also handles reservation of performance counters for coordination - * with other users (like oprofile). - * - * Note that these events normally don't tick when the CPU idles. This means - * the frequency varies with CPU load. - * - * Original code for K7/P6 written by Keith Owens - * - */ - -#include <linux/percpu.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/bitops.h> -#include <linux/smp.h> -#include <asm/nmi.h> -#include <linux/kprobes.h> - -#include <asm/apic.h> -#include <asm/perf_event.h> - -/* - * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's - * offset from MSR_P4_BSU_ESCR0. - * - * It will be the max for all platforms (for now) - */ -#define NMI_MAX_COUNTER_BITS 66 - -/* - * perfctr_nmi_owner tracks the ownership of the perfctr registers: - * evtsel_nmi_owner tracks the ownership of the event selection - * - different performance counters/ event selection may be reserved for - * different subsystems this reservation system just tries to coordinate - * things a little - */ -static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); -static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); - -/* converts an msr to an appropriate reservation bit */ -static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) -{ - /* returns the bit offset of the performance counter register */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (msr >= MSR_F15H_PERF_CTR) - return (msr - MSR_F15H_PERF_CTR) >> 1; - return msr - MSR_K7_PERFCTR0; - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return msr - MSR_ARCH_PERFMON_PERFCTR0; - - switch (boot_cpu_data.x86) { - case 6: - return msr - MSR_P6_PERFCTR0; - case 15: - return msr - MSR_P4_BPU_PERFCTR0; - } - } - return 0; -} - -/* - * converts an msr to an appropriate reservation bit - * returns the bit offset of the event selection register - */ -static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) -{ - /* returns the bit offset of the event selection register */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (msr >= MSR_F15H_PERF_CTL) - return (msr - MSR_F15H_PERF_CTL) >> 1; - return msr - MSR_K7_EVNTSEL0; - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return msr - MSR_ARCH_PERFMON_EVENTSEL0; - - switch (boot_cpu_data.x86) { - case 6: - return msr - MSR_P6_EVNTSEL0; - case 15: - return msr - MSR_P4_BSU_ESCR0; - } - } - return 0; - -} - -/* checks for a bit availability (hack for oprofile) */ -int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) -{ - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return !test_bit(counter, perfctr_nmi_owner); -} -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); - -int reserve_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - /* register not managed by the allocator? */ - if (counter > NMI_MAX_COUNTER_BITS) - return 1; - - if (!test_and_set_bit(counter, perfctr_nmi_owner)) - return 1; - return 0; -} -EXPORT_SYMBOL(reserve_perfctr_nmi); - -void release_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - /* register not managed by the allocator? */ - if (counter > NMI_MAX_COUNTER_BITS) - return; - - clear_bit(counter, perfctr_nmi_owner); -} -EXPORT_SYMBOL(release_perfctr_nmi); - -int reserve_evntsel_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_evntsel_msr_to_bit(msr); - /* register not managed by the allocator? */ - if (counter > NMI_MAX_COUNTER_BITS) - return 1; - - if (!test_and_set_bit(counter, evntsel_nmi_owner)) - return 1; - return 0; -} -EXPORT_SYMBOL(reserve_evntsel_nmi); - -void release_evntsel_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_evntsel_msr_to_bit(msr); - /* register not managed by the allocator? */ - if (counter > NMI_MAX_COUNTER_BITS) - return; - - clear_bit(counter, evntsel_nmi_owner); -} -EXPORT_SYMBOL(release_evntsel_nmi); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/powerflags.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/powerflags.c deleted file mode 100644 index 7b3fe56b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/powerflags.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Strings for the various x86 power flags - * - * This file must not contain any executable code. - */ - -#include <asm/cpufeature.h> - -const char *const x86_power_flags[32] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* tsc invariant mapped to constant_tsc */ - "cpb", /* core performance boost */ - "eff_freq_ro", /* Readonly aperf/mperf */ -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/proc.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/proc.c deleted file mode 100644 index 8022c668..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/proc.c +++ /dev/null @@ -1,167 +0,0 @@ -#include <linux/smp.h> -#include <linux/timex.h> -#include <linux/string.h> -#include <linux/seq_file.h> -#include <linux/cpufreq.h> - -/* - * Get CPU information for use by the procfs. - */ -static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, - unsigned int cpu) -{ -#ifdef CONFIG_SMP - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpumask_weight(cpu_core_mask(cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - seq_printf(m, "apicid\t\t: %d\n", c->apicid); - seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid); - } -#endif -} - -#ifdef CONFIG_X86_32 -static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) -{ - /* - * We use exception 16 if we have hardware math and we've either seen - * it or the CPU claims it is internal - */ - int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); - seq_printf(m, - "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); -} -#else -static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) -{ - seq_printf(m, - "fpu\t\t: yes\n" - "fpu_exception\t: yes\n" - "cpuid level\t: %d\n" - "wp\t\t: yes\n", - c->cpuid_level); -} -#endif - -static int show_cpuinfo(struct seq_file *m, void *v) -{ - struct cpuinfo_x86 *c = v; - unsigned int cpu; - int i; - - cpu = c->cpu_index; - seq_printf(m, "processor\t: %u\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %u\n" - "model name\t: %s\n", - cpu, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - if (c->microcode) - seq_printf(m, "microcode\t: 0x%x\n", c->microcode); - - if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get(cpu); - - if (!freq) - freq = cpu_khz; - seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); - - show_cpuinfo_core(m, c, cpu); - show_cpuinfo_misc(m, c); - - seq_printf(m, "flags\t\t:"); - for (i = 0; i < 32*NCAPINTS; i++) - if (cpu_has(c, i) && x86_cap_flags[i] != NULL) - seq_printf(m, " %s", x86_cap_flags[i]); - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - -#ifdef CONFIG_X86_64 - if (c->x86_tlbsize > 0) - seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); -#endif - seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); - seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); - seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", - c->x86_phys_bits, c->x86_virt_bits); - - seq_printf(m, "power management:"); - for (i = 0; i < 32; i++) { - if (c->x86_power & (1 << i)) { - if (i < ARRAY_SIZE(x86_power_flags) && - x86_power_flags[i]) - seq_printf(m, "%s%s", - x86_power_flags[i][0] ? " " : "", - x86_power_flags[i]); - else - seq_printf(m, " [%d]", i); - } - } - - seq_printf(m, "\n\n"); - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = cpumask_first(cpu_online_mask); - else - *pos = cpumask_next(*pos - 1, cpu_online_mask); - if ((*pos) < nr_cpu_ids) - return &cpu_data(*pos); - return NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/rdrand.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/rdrand.c deleted file mode 100644 index feca286c..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/rdrand.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * This file is part of the Linux kernel. - * - * Copyright (c) 2011, Intel Corporation - * Authors: Fenghua Yu <fenghua.yu@intel.com>, - * H. Peter Anvin <hpa@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#include <asm/processor.h> -#include <asm/archrandom.h> -#include <asm/sections.h> - -static int __init x86_rdrand_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_RDRAND); - return 1; -} -__setup("nordrand", x86_rdrand_setup); - -/* We can't use arch_get_random_long() here since alternatives haven't run */ -static inline int rdrand_long(unsigned long *v) -{ - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return ok; -} - -/* - * Force a reseed cycle; we are architecturally guaranteed a reseed - * after no more than 512 128-bit chunks of random data. This also - * acts as a test of the CPU capability. - */ -#define RESEED_LOOP ((512*128)/sizeof(unsigned long)) - -void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_ARCH_RANDOM - unsigned long tmp; - int i, count, ok; - - if (!cpu_has(c, X86_FEATURE_RDRAND)) - return; /* Nothing to do */ - - for (count = i = 0; i < RESEED_LOOP; i++) { - ok = rdrand_long(&tmp); - if (ok) - count++; - } - - if (count != RESEED_LOOP) - clear_cpu_cap(c, X86_FEATURE_RDRAND); -#endif -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/scattered.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/scattered.c deleted file mode 100644 index ee8e9abc..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/scattered.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Routines to indentify additional cpu features that are scattered in - * cpuid space. - */ -#include <linux/cpu.h> - -#include <asm/pat.h> -#include <asm/processor.h> - -#include <asm/apic.h> - -struct cpuid_bit { - u16 feature; - u8 reg; - u8 bit; - u32 level; - u32 sub_leaf; -}; - -enum cpuid_regs { - CR_EAX = 0, - CR_ECX, - CR_EDX, - CR_EBX -}; - -void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) -{ - u32 max_level; - u32 regs[4]; - const struct cpuid_bit *cb; - - static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { - { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, - { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, - { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, - { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, - { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, - { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, - { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, - { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, - { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, - { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, - { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, - { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, - { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, - { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 }, - { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 }, - { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 }, - { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 }, - { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 }, - { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 }, - { 0, 0, 0, 0, 0 } - }; - - for (cb = cpuid_bits; cb->feature; cb++) { - - /* Verify that the level is valid */ - max_level = cpuid_eax(cb->level & 0xffff0000); - if (max_level < cb->level || - max_level > (cb->level | 0xffff)) - continue; - - cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], - ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); - - if (regs[cb->reg] & (1 << cb->bit)) - set_cpu_cap(c, cb->feature); - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/sched.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/sched.c deleted file mode 100644 index a640ae5a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/sched.c +++ /dev/null @@ -1,55 +0,0 @@ -#include <linux/sched.h> -#include <linux/math64.h> -#include <linux/percpu.h> -#include <linux/irqflags.h> - -#include <asm/cpufeature.h> -#include <asm/processor.h> - -#ifdef CONFIG_SMP - -static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); - -static unsigned long scale_aperfmperf(void) -{ - struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); - unsigned long ratio, flags; - - local_irq_save(flags); - get_aperfmperf(&val); - local_irq_restore(flags); - - ratio = calc_aperfmperf_ratio(old, &val); - *old = val; - - return ratio; -} - -unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) -{ - /* - * do aperf/mperf on the cpu level because it includes things - * like turbo mode, which are relevant to full cores. - */ - if (boot_cpu_has(X86_FEATURE_APERFMPERF)) - return scale_aperfmperf(); - - /* - * maybe have something cpufreq here - */ - - return default_scale_freq_power(sd, cpu); -} - -unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) -{ - /* - * aperf/mperf already includes the smt gain - */ - if (boot_cpu_has(X86_FEATURE_APERFMPERF)) - return SCHED_LOAD_SCALE; - - return default_scale_smt_power(sd, cpu); -} - -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/topology.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/topology.c deleted file mode 100644 index 4397e987..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/topology.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Check for extended topology enumeration cpuid leaf 0xb and if it - * exists, use it for populating initial_apicid and cpu topology - * detection. - */ - -#include <linux/cpu.h> -#include <asm/apic.h> -#include <asm/pat.h> -#include <asm/processor.h> - -/* leaf 0xb SMT level */ -#define SMT_LEVEL 0 - -/* leaf 0xb sub-leaf types */ -#define INVALID_TYPE 0 -#define SMT_TYPE 1 -#define CORE_TYPE 2 - -#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) -#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) - -/* - * Check for extended topology enumeration cpuid leaf 0xb and if it - * exists, use it for populating initial_apicid and cpu topology - * detection. - */ -void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width; - unsigned int core_select_mask, core_level_siblings; - static bool printed; - - if (c->cpuid_level < 0xb) - return; - - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - - /* - * check if the cpuid leaf 0xb is actually implemented. - */ - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) - return; - - set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); - - /* - * initial apic id, which also represents 32-bit extended x2apic id. - */ - c->initial_apicid = edx; - - /* - * Populate HT related information from sub-leaf level 0. - */ - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); - core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - - sub_index = 1; - do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); - - /* - * Check for the Core type in the implemented sub leaves. - */ - if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { - core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - break; - } - - sub_index++; - } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); - - core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - - c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) - & core_select_mask; - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); - /* - * Reinit the apicid, now that we have extended initial_apicid. - */ - c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); - - c->x86_max_cores = (core_level_siblings / smp_num_siblings); - - if (!printed) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - printed = 1; - } - return; -#endif -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/transmeta.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/transmeta.c deleted file mode 100644 index 28000743..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/transmeta.c +++ /dev/null @@ -1,109 +0,0 @@ -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <asm/processor.h> -#include <asm/msr.h> -#include "cpu.h" - -static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - -static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) -{ - unsigned int cap_mask, uk, max, dummy; - unsigned int cms_rev1, cms_rev2; - unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; - char cpu_info[65]; - - early_init_transmeta(c); - - cpu_detect_cache_sizes(c); - - /* Print CMS and CPU revision */ - max = cpuid_eax(0x80860000); - cpu_rev = 0; - if (max >= 0x80860001) { - cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); - if (cpu_rev != 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", - (cpu_rev >> 24) & 0xff, - (cpu_rev >> 16) & 0xff, - (cpu_rev >> 8) & 0xff, - cpu_rev & 0xff, - cpu_freq); - } - } - if (max >= 0x80860002) { - cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); - if (cpu_rev == 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", - new_cpu_rev, cpu_freq); - } - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", - (cms_rev1 >> 24) & 0xff, - (cms_rev1 >> 16) & 0xff, - (cms_rev1 >> 8) & 0xff, - cms_rev1 & 0xff, - cms_rev2); - } - if (max >= 0x80860006) { - cpuid(0x80860003, - (void *)&cpu_info[0], - (void *)&cpu_info[4], - (void *)&cpu_info[8], - (void *)&cpu_info[12]); - cpuid(0x80860004, - (void *)&cpu_info[16], - (void *)&cpu_info[20], - (void *)&cpu_info[24], - (void *)&cpu_info[28]); - cpuid(0x80860005, - (void *)&cpu_info[32], - (void *)&cpu_info[36], - (void *)&cpu_info[40], - (void *)&cpu_info[44]); - cpuid(0x80860006, - (void *)&cpu_info[48], - (void *)&cpu_info[52], - (void *)&cpu_info[56], - (void *)&cpu_info[60]); - cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); - } - - /* Unhide possibly hidden capability flags */ - rdmsr(0x80860004, cap_mask, uk); - wrmsr(0x80860004, ~0, uk); - c->x86_capability[0] = cpuid_edx(0x00000001); - wrmsr(0x80860004, cap_mask, uk); - - /* All Transmeta CPUs have a constant TSC */ - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_SYSCTL - /* - * randomize_va_space slows us down enormously; - * it probably triggers retranslation of x86->native bytecode - */ - randomize_va_space = 0; -#endif -} - -static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = { - .c_vendor = "Transmeta", - .c_ident = { "GenuineTMx86", "TransmetaCPU" }, - .c_early_init = early_init_transmeta, - .c_init = init_transmeta, - .c_x86_vendor = X86_VENDOR_TRANSMETA, -}; - -cpu_dev_register(transmeta_cpu_dev); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/umc.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/umc.c deleted file mode 100644 index fd2c37bf..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/umc.c +++ /dev/null @@ -1,26 +0,0 @@ -#include <linux/kernel.h> -#include <linux/init.h> -#include <asm/processor.h> -#include "cpu.h" - -/* - * UMC chips appear to be only either 386 or 486, - * so no special init takes place. - */ - -static const struct cpu_dev __cpuinitconst umc_cpu_dev = { - .c_vendor = "UMC", - .c_ident = { "UMC UMC UMC" }, - .c_models = { - { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = - { - [1] = "U5D", - [2] = "U5S", - } - }, - }, - .c_x86_vendor = X86_VENDOR_UMC, -}; - -cpu_dev_register(umc_cpu_dev); - diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpu/vmware.c b/ANDROID_3.4.5/arch/x86/kernel/cpu/vmware.c deleted file mode 100644 index d22d0c4e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpu/vmware.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * VMware Detection code. - * - * Copyright (C) 2008, VMware, Inc. - * Author : Alok N Kataria <akataria@vmware.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#include <linux/dmi.h> -#include <linux/module.h> -#include <asm/div64.h> -#include <asm/x86_init.h> -#include <asm/hypervisor.h> - -#define CPUID_VMWARE_INFO_LEAF 0x40000000 -#define VMWARE_HYPERVISOR_MAGIC 0x564D5868 -#define VMWARE_HYPERVISOR_PORT 0x5658 - -#define VMWARE_PORT_CMD_GETVERSION 10 -#define VMWARE_PORT_CMD_GETHZ 45 - -#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ - __asm__("inl (%%dx)" : \ - "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ - "0"(VMWARE_HYPERVISOR_MAGIC), \ - "1"(VMWARE_PORT_CMD_##cmd), \ - "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \ - "memory"); - -static inline int __vmware_platform(void) -{ - uint32_t eax, ebx, ecx, edx; - VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx); - return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; -} - -static unsigned long vmware_get_tsc_khz(void) -{ - uint64_t tsc_hz, lpj; - uint32_t eax, ebx, ecx, edx; - - VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); - - tsc_hz = eax | (((uint64_t)ebx) << 32); - do_div(tsc_hz, 1000); - BUG_ON(tsc_hz >> 32); - printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", - (unsigned long) tsc_hz / 1000, - (unsigned long) tsc_hz % 1000); - - if (!preset_lpj) { - lpj = ((u64)tsc_hz * 1000); - do_div(lpj, HZ); - preset_lpj = lpj; - } - - return tsc_hz; -} - -static void __init vmware_platform_setup(void) -{ - uint32_t eax, ebx, ecx, edx; - - VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); - - if (ebx != UINT_MAX) - x86_platform.calibrate_tsc = vmware_get_tsc_khz; - else - printk(KERN_WARNING - "Failed to get TSC freq from the hypervisor\n"); -} - -/* - * While checking the dmi string information, just checking the product - * serial key should be enough, as this will always have a VMware - * specific string when running under VMware hypervisor. - */ -static bool __init vmware_platform(void) -{ - if (cpu_has_hypervisor) { - unsigned int eax; - unsigned int hyper_vendor_id[3]; - - cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], - &hyper_vendor_id[1], &hyper_vendor_id[2]); - if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) - return true; - } else if (dmi_available && dmi_name_in_serial("VMware") && - __vmware_platform()) - return true; - - return false; -} - -/* - * VMware hypervisor takes care of exporting a reliable TSC to the guest. - * Still, due to timing difference when running on virtual cpus, the TSC can - * be marked as unstable in some cases. For example, the TSC sync check at - * bootup can fail due to a marginal offset between vcpus' TSCs (though the - * TSCs do not drift from each other). Also, the ACPI PM timer clocksource - * is not suitable as a watchdog when running on a hypervisor because the - * kernel may miss a wrap of the counter if the vcpu is descheduled for a - * long time. To skip these checks at runtime we set these capability bits, - * so that the kernel could just trust the hypervisor with providing a - * reliable virtual TSC that is suitable for timekeeping. - */ -static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) -{ - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); -} - -const __refconst struct hypervisor_x86 x86_hyper_vmware = { - .name = "VMware", - .detect = vmware_platform, - .set_cpu_features = vmware_set_cpu_features, - .init_platform = vmware_platform_setup, -}; -EXPORT_SYMBOL(x86_hyper_vmware); diff --git a/ANDROID_3.4.5/arch/x86/kernel/cpuid.c b/ANDROID_3.4.5/arch/x86/kernel/cpuid.c deleted file mode 100644 index 39472dd2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/cpuid.c +++ /dev/null @@ -1,240 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * x86 CPUID access device - * - * This device is accessed by lseek() to the appropriate CPUID level - * and then read in chunks of 16 bytes. A larger size means multiple - * reads of consecutive levels. - * - * The lower 32 bits of the file position is used as the incoming %eax, - * and the upper 32 bits of the file position as the incoming %ecx, - * the latter intended for "counting" eax levels like eax=4. - * - * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include <linux/module.h> - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/smp.h> -#include <linux/major.h> -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/cpu.h> -#include <linux/notifier.h> -#include <linux/uaccess.h> -#include <linux/gfp.h> - -#include <asm/processor.h> -#include <asm/msr.h> - -static struct class *cpuid_class; - -struct cpuid_regs { - u32 eax, ebx, ecx, edx; -}; - -static void cpuid_smp_cpuid(void *cmd_block) -{ - struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block; - - cpuid_count(cmd->eax, cmd->ecx, - &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); -} - -static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file->f_mapping->host; - - mutex_lock(&inode->i_mutex); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - -static ssize_t cpuid_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - char __user *tmp = buf; - struct cpuid_regs cmd; - int cpu = iminor(file->f_path.dentry->d_inode); - u64 pos = *ppos; - ssize_t bytes = 0; - int err = 0; - - if (count % 16) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 16) { - cmd.eax = pos; - cmd.ecx = pos >> 32; - err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); - if (err) - break; - if (copy_to_user(tmp, &cmd, 16)) { - err = -EFAULT; - break; - } - tmp += 16; - bytes += 16; - *ppos = ++pos; - } - - return bytes ? bytes : err; -} - -static int cpuid_open(struct inode *inode, struct file *file) -{ - unsigned int cpu; - struct cpuinfo_x86 *c; - - cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - - c = &cpu_data(cpu); - if (c->cpuid_level < 0) - return -EIO; /* CPUID not supported */ - - return 0; -} - -/* - * File operations we support - */ -static const struct file_operations cpuid_fops = { - .owner = THIS_MODULE, - .llseek = cpuid_seek, - .read = cpuid_read, - .open = cpuid_open, -}; - -static __cpuinit int cpuid_device_create(int cpu) -{ - struct device *dev; - - dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), NULL, - "cpu%d", cpu); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; -} - -static void cpuid_device_destroy(int cpu) -{ - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); -} - -static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - int err = 0; - - switch (action) { - case CPU_UP_PREPARE: - err = cpuid_device_create(cpu); - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - cpuid_device_destroy(cpu); - break; - } - return notifier_from_errno(err); -} - -static struct notifier_block __refdata cpuid_class_cpu_notifier = -{ - .notifier_call = cpuid_class_cpu_callback, -}; - -static char *cpuid_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); -} - -static int __init cpuid_init(void) -{ - int i, err = 0; - i = 0; - - if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS, - "cpu/cpuid", &cpuid_fops)) { - printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", - CPUID_MAJOR); - err = -EBUSY; - goto out; - } - cpuid_class = class_create(THIS_MODULE, "cpuid"); - if (IS_ERR(cpuid_class)) { - err = PTR_ERR(cpuid_class); - goto out_chrdev; - } - cpuid_class->devnode = cpuid_devnode; - for_each_online_cpu(i) { - err = cpuid_device_create(i); - if (err != 0) - goto out_class; - } - register_hotcpu_notifier(&cpuid_class_cpu_notifier); - - err = 0; - goto out; - -out_class: - i = 0; - for_each_online_cpu(i) { - cpuid_device_destroy(i); - } - class_destroy(cpuid_class); -out_chrdev: - __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); -out: - return err; -} - -static void __exit cpuid_exit(void) -{ - int cpu = 0; - - for_each_online_cpu(cpu) - cpuid_device_destroy(cpu); - class_destroy(cpuid_class); - __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); - unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); -} - -module_init(cpuid_init); -module_exit(cpuid_exit); - -MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>"); -MODULE_DESCRIPTION("x86 generic CPUID driver"); -MODULE_LICENSE("GPL"); diff --git a/ANDROID_3.4.5/arch/x86/kernel/crash.c b/ANDROID_3.4.5/arch/x86/kernel/crash.c deleted file mode 100644 index 13ad8997..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/crash.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Architecture specific (i386/x86_64) functions for kexec based crash dumps. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * - * Copyright (C) IBM Corporation, 2004. All rights reserved. - * - */ - -#include <linux/init.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/reboot.h> -#include <linux/kexec.h> -#include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> - -#include <asm/processor.h> -#include <asm/hardirq.h> -#include <asm/nmi.h> -#include <asm/hw_irq.h> -#include <asm/apic.h> -#include <asm/hpet.h> -#include <linux/kdebug.h> -#include <asm/cpu.h> -#include <asm/reboot.h> -#include <asm/virtext.h> - -int in_crash_kexec; - -#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) - -static void kdump_nmi_callback(int cpu, struct pt_regs *regs) -{ -#ifdef CONFIG_X86_32 - struct pt_regs fixed_regs; -#endif - -#ifdef CONFIG_X86_32 - if (!user_mode_vm(regs)) { - crash_fixup_ss_esp(&fixed_regs, regs); - regs = &fixed_regs; - } -#endif - crash_save_cpu(regs, cpu); - - /* Disable VMX or SVM if needed. - * - * We need to disable virtualization on all CPUs. - * Having VMX or SVM enabled on any CPU may break rebooting - * after the kdump kernel has finished its task. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - - disable_local_APIC(); -} - -static void kdump_nmi_shootdown_cpus(void) -{ - in_crash_kexec = 1; - nmi_shootdown_cpus(kdump_nmi_callback); - - disable_local_APIC(); -} - -#else -static void kdump_nmi_shootdown_cpus(void) -{ - /* There are no cpus to shootdown */ -} -#endif - -void native_machine_crash_shutdown(struct pt_regs *regs) -{ - /* This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means shooting down the other cpus in - * an SMP system. - */ - /* The kernel is broken so disable interrupts */ - local_irq_disable(); - - kdump_nmi_shootdown_cpus(); - - /* Booting kdump kernel with VMX or SVM enabled won't work, - * because (among other limitations) we can't disable paging - * with the virt flags. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - - lapic_shutdown(); -#if defined(CONFIG_X86_IO_APIC) - disable_IO_APIC(); -#endif -#ifdef CONFIG_HPET_TIMER - hpet_disable(); -#endif - crash_save_cpu(regs, safe_smp_processor_id()); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/crash_dump_32.c b/ANDROID_3.4.5/arch/x86/kernel/crash_dump_32.c deleted file mode 100644 index 11891ca7..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/crash_dump_32.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Memory preserving reboot related code. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * Copyright (C) IBM Corporation, 2004. All rights reserved - */ - -#include <linux/slab.h> -#include <linux/errno.h> -#include <linux/highmem.h> -#include <linux/crash_dump.h> - -#include <asm/uaccess.h> - -static void *kdump_buf_page; - -static inline bool is_crashed_pfn_valid(unsigned long pfn) -{ -#ifndef CONFIG_X86_PAE - /* - * non-PAE kdump kernel executed from a PAE one will crop high pte - * bits and poke unwanted space counting again from address 0, we - * don't want that. pte must fit into unsigned long. In fact the - * test checks high 12 bits for being zero (pfn will be shifted left - * by PAGE_SHIFT). - */ - return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn; -#else - return true; -#endif -} - -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - * - * Calling copy_to_user() in atomic context is not desirable. Hence first - * copying the data to a pre-allocated kernel page and then copying to user - * space in non-atomic context. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) -{ - void *vaddr; - - if (!csize) - return 0; - - if (!is_crashed_pfn_valid(pfn)) - return -EFAULT; - - vaddr = kmap_atomic_pfn(pfn); - - if (!userbuf) { - memcpy(buf, (vaddr + offset), csize); - kunmap_atomic(vaddr); - } else { - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Kdump buffer page not" - " allocated\n"); - kunmap_atomic(vaddr); - return -EFAULT; - } - copy_page(kdump_buf_page, vaddr); - kunmap_atomic(vaddr); - if (copy_to_user(buf, (kdump_buf_page + offset), csize)) - return -EFAULT; - } - - return csize; -} - -static int __init kdump_buf_page_init(void) -{ - int ret = 0; - - kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer" - " page\n"); - ret = -ENOMEM; - } - - return ret; -} -arch_initcall(kdump_buf_page_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/crash_dump_64.c b/ANDROID_3.4.5/arch/x86/kernel/crash_dump_64.c deleted file mode 100644 index afa64adb..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/crash_dump_64.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Memory preserving reboot related code. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * Copyright (C) IBM Corporation, 2004. All rights reserved - */ - -#include <linux/errno.h> -#include <linux/crash_dump.h> -#include <linux/uaccess.h> -#include <linux/io.h> - -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) -{ - void *vaddr; - - if (!csize) - return 0; - - vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!vaddr) - return -ENOMEM; - - if (userbuf) { - if (copy_to_user(buf, vaddr + offset, csize)) { - iounmap(vaddr); - return -EFAULT; - } - } else - memcpy(buf, vaddr + offset, csize); - - set_iounmap_nonlazy(); - iounmap(vaddr); - return csize; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/devicetree.c b/ANDROID_3.4.5/arch/x86/kernel/devicetree.c deleted file mode 100644 index 3ae2ced4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/devicetree.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Architecture specific OF callbacks. - */ -#include <linux/bootmem.h> -#include <linux/export.h> -#include <linux/io.h> -#include <linux/irqdomain.h> -#include <linux/interrupt.h> -#include <linux/list.h> -#include <linux/of.h> -#include <linux/of_fdt.h> -#include <linux/of_address.h> -#include <linux/of_platform.h> -#include <linux/of_irq.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/of_pci.h> -#include <linux/initrd.h> - -#include <asm/hpet.h> -#include <asm/apic.h> -#include <asm/pci_x86.h> - -__initdata u64 initial_dtb; -char __initdata cmd_line[COMMAND_LINE_SIZE]; - -int __initdata of_ioapic; - -unsigned long pci_address_to_pio(phys_addr_t address) -{ - /* - * The ioport address can be directly used by inX / outX - */ - BUG_ON(address >= (1 << 16)); - return (unsigned long)address; -} -EXPORT_SYMBOL_GPL(pci_address_to_pio); - -void __init early_init_dt_scan_chosen_arch(unsigned long node) -{ - BUG(); -} - -void __init early_init_dt_add_memory_arch(u64 base, u64 size) -{ - BUG(); -} - -void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) -{ - return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) -{ - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(end); - initrd_below_start_ok = 1; -} -#endif - -void __init add_dtb(u64 data) -{ - initial_dtb = data + offsetof(struct setup_data, data); -} - -/* - * CE4100 ids. Will be moved to machine_device_initcall() once we have it. - */ -static struct of_device_id __initdata ce4100_ids[] = { - { .compatible = "intel,ce4100-cp", }, - { .compatible = "isa", }, - { .compatible = "pci", }, - {}, -}; - -static int __init add_bus_probe(void) -{ - if (!of_have_populated_dt()) - return 0; - - return of_platform_bus_probe(NULL, ce4100_ids, NULL); -} -module_init(add_bus_probe); - -#ifdef CONFIG_PCI -struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) -{ - struct device_node *np; - - for_each_node_by_type(np, "pci") { - const void *prop; - unsigned int bus_min; - - prop = of_get_property(np, "bus-range", NULL); - if (!prop) - continue; - bus_min = be32_to_cpup(prop); - if (bus->number == bus_min) - return np; - } - return NULL; -} - -static int x86_of_pci_irq_enable(struct pci_dev *dev) -{ - struct of_irq oirq; - u32 virq; - int ret; - u8 pin; - - ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (ret) - return ret; - if (!pin) - return 0; - - ret = of_irq_map_pci(dev, &oirq); - if (ret) - return ret; - - virq = irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); - if (virq == 0) - return -EINVAL; - dev->irq = virq; - return 0; -} - -static void x86_of_pci_irq_disable(struct pci_dev *dev) -{ -} - -void __cpuinit x86_of_pci_init(void) -{ - pcibios_enable_irq = x86_of_pci_irq_enable; - pcibios_disable_irq = x86_of_pci_irq_disable; -} -#endif - -static void __init dtb_setup_hpet(void) -{ -#ifdef CONFIG_HPET_TIMER - struct device_node *dn; - struct resource r; - int ret; - - dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-hpet"); - if (!dn) - return; - ret = of_address_to_resource(dn, 0, &r); - if (ret) { - WARN_ON(1); - return; - } - hpet_address = r.start; -#endif -} - -static void __init dtb_lapic_setup(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - struct device_node *dn; - struct resource r; - int ret; - - dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic"); - if (!dn) - return; - - ret = of_address_to_resource(dn, 0, &r); - if (WARN_ON(ret)) - return; - - /* Did the boot loader setup the local APIC ? */ - if (!cpu_has_apic) { - if (apic_force_enable(r.start)) - return; - } - smp_found_config = 1; - pic_mode = 1; - register_lapic_address(r.start); - generic_processor_info(boot_cpu_physical_apicid, - GET_APIC_VERSION(apic_read(APIC_LVR))); -#endif -} - -#ifdef CONFIG_X86_IO_APIC -static unsigned int ioapic_id; - -static void __init dtb_add_ioapic(struct device_node *dn) -{ - struct resource r; - int ret; - - ret = of_address_to_resource(dn, 0, &r); - if (ret) { - printk(KERN_ERR "Can't obtain address from node %s.\n", - dn->full_name); - return; - } - mp_register_ioapic(++ioapic_id, r.start, gsi_top); -} - -static void __init dtb_ioapic_setup(void) -{ - struct device_node *dn; - - for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic") - dtb_add_ioapic(dn); - - if (nr_ioapics) { - of_ioapic = 1; - return; - } - printk(KERN_ERR "Error: No information about IO-APIC in OF.\n"); -} -#else -static void __init dtb_ioapic_setup(void) {} -#endif - -static void __init dtb_apic_setup(void) -{ - dtb_lapic_setup(); - dtb_ioapic_setup(); -} - -#ifdef CONFIG_OF_FLATTREE -static void __init x86_flattree_get_config(void) -{ - u32 size, map_len; - void *new_dtb; - - if (!initial_dtb) - return; - - map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), - (u64)sizeof(struct boot_param_header)); - - initial_boot_params = early_memremap(initial_dtb, map_len); - size = be32_to_cpu(initial_boot_params->totalsize); - if (map_len < size) { - early_iounmap(initial_boot_params, map_len); - initial_boot_params = early_memremap(initial_dtb, size); - map_len = size; - } - - new_dtb = alloc_bootmem(size); - memcpy(new_dtb, initial_boot_params, size); - early_iounmap(initial_boot_params, map_len); - - initial_boot_params = new_dtb; - - /* root level address cells */ - of_scan_flat_dt(early_init_dt_scan_root, NULL); - - unflatten_device_tree(); -} -#else -static inline void x86_flattree_get_config(void) { } -#endif - -void __init x86_dtb_init(void) -{ - x86_flattree_get_config(); - - if (!of_have_populated_dt()) - return; - - dtb_setup_hpet(); - dtb_apic_setup(); -} - -#ifdef CONFIG_X86_IO_APIC - -struct of_ioapic_type { - u32 out_type; - u32 trigger; - u32 polarity; -}; - -static struct of_ioapic_type of_ioapic_type[] = -{ - { - .out_type = IRQ_TYPE_EDGE_RISING, - .trigger = IOAPIC_EDGE, - .polarity = 1, - }, - { - .out_type = IRQ_TYPE_LEVEL_LOW, - .trigger = IOAPIC_LEVEL, - .polarity = 0, - }, - { - .out_type = IRQ_TYPE_LEVEL_HIGH, - .trigger = IOAPIC_LEVEL, - .polarity = 1, - }, - { - .out_type = IRQ_TYPE_EDGE_FALLING, - .trigger = IOAPIC_EDGE, - .polarity = 0, - }, -}; - -static int ioapic_xlate(struct irq_domain *domain, - struct device_node *controller, - const u32 *intspec, u32 intsize, - irq_hw_number_t *out_hwirq, u32 *out_type) -{ - struct io_apic_irq_attr attr; - struct of_ioapic_type *it; - u32 line, idx; - int rc; - - if (WARN_ON(intsize < 2)) - return -EINVAL; - - line = intspec[0]; - - if (intspec[1] >= ARRAY_SIZE(of_ioapic_type)) - return -EINVAL; - - it = &of_ioapic_type[intspec[1]]; - - idx = (u32) domain->host_data; - set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); - - rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line), - cpu_to_node(0), &attr); - if (rc) - return rc; - - *out_hwirq = line; - *out_type = it->out_type; - return 0; -} - -const struct irq_domain_ops ioapic_irq_domain_ops = { - .xlate = ioapic_xlate, -}; - -static void __init ioapic_add_ofnode(struct device_node *np) -{ - struct resource r; - int i, ret; - - ret = of_address_to_resource(np, 0, &r); - if (ret) { - printk(KERN_ERR "Failed to obtain address for %s\n", - np->full_name); - return; - } - - for (i = 0; i < nr_ioapics; i++) { - if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); - return; - } - } - printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name); -} - -void __init x86_add_irq_domains(void) -{ - struct device_node *dp; - - if (!of_have_populated_dt()) - return; - - for_each_node_with_property(dp, "interrupt-controller") { - if (of_device_is_compatible(dp, "intel,ce4100-ioapic")) - ioapic_add_ofnode(dp); - } -} -#else -void __init x86_add_irq_domains(void) { } -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/doublefault_32.c b/ANDROID_3.4.5/arch/x86/kernel/doublefault_32.c deleted file mode 100644 index 37250fe4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/doublefault_32.c +++ /dev/null @@ -1,69 +0,0 @@ -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/fs.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/desc.h> - -#define DOUBLEFAULT_STACKSIZE (1024) -static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) - -#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) - -static void doublefault_fn(void) -{ - struct desc_ptr gdt_desc = {0, 0}; - unsigned long gdt, tss; - - store_gdt(&gdt_desc); - gdt = gdt_desc.address; - - printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); - - if (ptr_ok(gdt)) { - gdt += GDT_ENTRY_TSS << 3; - tss = get_desc_base((struct desc_struct *)gdt); - printk(KERN_EMERG "double fault, tss at %08lx\n", tss); - - if (ptr_ok(tss)) { - struct x86_hw_tss *t = (struct x86_hw_tss *)tss; - - printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", - t->ip, t->sp); - - printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", - t->ax, t->bx, t->cx, t->dx); - printk(KERN_EMERG "esi = %08lx, edi = %08lx\n", - t->si, t->di); - } - } - - for (;;) - cpu_relax(); -} - -struct tss_struct doublefault_tss __cacheline_aligned = { - .x86_tss = { - .sp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, - - .ip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .flags = X86_EFLAGS_SF | 0x2, - .sp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, - - .__cr3 = __pa_nodebug(swapper_pg_dir), - } -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/dumpstack.c b/ANDROID_3.4.5/arch/x86/kernel/dumpstack.c deleted file mode 100644 index 1b81839b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/dumpstack.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ -#include <linux/kallsyms.h> -#include <linux/kprobes.h> -#include <linux/uaccess.h> -#include <linux/utsname.h> -#include <linux/hardirq.h> -#include <linux/kdebug.h> -#include <linux/module.h> -#include <linux/ptrace.h> -#include <linux/ftrace.h> -#include <linux/kexec.h> -#include <linux/bug.h> -#include <linux/nmi.h> -#include <linux/sysfs.h> - -#include <asm/stacktrace.h> - - -int panic_on_unrecovered_nmi; -int panic_on_io_nmi; -unsigned int code_bytes = 64; -int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; -static int die_counter; - -void printk_address(unsigned long address, int reliable) -{ - printk(" [<%p>] %s%pB\n", (void *) address, - reliable ? "" : "? ", (void *) address); -} - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void -print_ftrace_graph_addr(unsigned long addr, void *data, - const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) -{ - struct task_struct *task; - unsigned long ret_addr; - int index; - - if (addr != (unsigned long)return_to_handler) - return; - - task = tinfo->task; - index = task->curr_ret_stack; - - if (!task->ret_stack || index < *graph) - return; - - index -= *graph; - ret_addr = task->ret_stack[index].ret; - - ops->address(data, ret_addr, 1); - - (*graph)++; -} -#else -static inline void -print_ftrace_graph_addr(unsigned long addr, void *data, - const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) -{ } -#endif - -/* - * x86-64 can have up to three kernel stacks: - * process stack - * interrupt stack - * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack - */ - -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) -{ - void *t = tinfo; - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p > t && p < t + THREAD_SIZE - size; -} - -unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + sizeof(long)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, 0); - } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); - } - stack++; - } - return bp; -} -EXPORT_SYMBOL_GPL(print_context_stack); - -unsigned long -print_context_stack_bp(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - unsigned long *ret_addr = &frame->return_address; - - while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { - unsigned long addr = *ret_addr; - - if (!__kernel_text_address(addr)) - break; - - ops->address(data, addr, 1); - frame = frame->next_frame; - ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); - } - - return (unsigned long)frame; -} -EXPORT_SYMBOL_GPL(print_context_stack_bp); - -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk(data); - printk_address(addr, reliable); -} - -static const struct stacktrace_ops print_trace_ops = { - .stack = print_trace_stack, - .address = print_trace_address, - .walk_stack = print_context_stack, -}; - -void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp; - unsigned long stack; - - bp = stack_frame(current, NULL); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} -EXPORT_SYMBOL(dump_stack); - -static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - int cpu; - unsigned long flags; - - oops_enter(); - - /* racy, but better than risking deadlock. */ - raw_local_irq_save(flags); - cpu = smp_processor_id(); - if (!arch_spin_trylock(&die_lock)) { - if (cpu == die_owner) - /* nested oops. should stop eventually */; - else - arch_spin_lock(&die_lock); - } - die_nest_count++; - die_owner = cpu; - console_verbose(); - bust_spinlocks(1); - return flags; -} -EXPORT_SYMBOL_GPL(oops_begin); - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - if (regs && kexec_should_crash(current)) - crash_kexec(regs); - - bust_spinlocks(0); - die_owner = -1; - add_taint(TAINT_DIE); - die_nest_count--; - if (!die_nest_count) - /* Nest count reaches zero, release the lock. */ - arch_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - oops_exit(); - - if (!signr) - return; - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception"); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ -#ifdef CONFIG_X86_32 - unsigned short ss; - unsigned long sp; -#endif - printk(KERN_DEFAULT - "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); -#ifdef CONFIG_X86_32 - if (user_mode_vm(regs)) { - sp = regs->sp; - ss = regs->ss & 0xffff; - } else { - sp = kernel_stack_pointer(regs); - savesegment(ss, ss); - } - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); - print_symbol("%s", regs->ip); - printk(" SS:ESP %04x:%08lx\n", ss, sp); -#else - /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP "); - printk_address(regs->ip, 1); - printk(" RSP <%016lx>\n", regs->sp); -#endif - return 0; -} - -/* - * This is gone through when something in the kernel has done something bad - * and is about to be terminated: - */ -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - int sig = SIGSEGV; - - if (!user_mode_vm(regs)) - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - sig = 0; - oops_end(flags, regs, sig); -} - -static int __init kstack_setup(char *s) -{ - if (!s) - return -EINVAL; - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; -} -early_param("kstack", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); diff --git a/ANDROID_3.4.5/arch/x86/kernel/dumpstack_32.c b/ANDROID_3.4.5/arch/x86/kernel/dumpstack_32.c deleted file mode 100644 index 88ec9129..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/dumpstack_32.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ -#include <linux/kallsyms.h> -#include <linux/kprobes.h> -#include <linux/uaccess.h> -#include <linux/hardirq.h> -#include <linux/kdebug.h> -#include <linux/module.h> -#include <linux/ptrace.h> -#include <linux/kexec.h> -#include <linux/sysfs.h> -#include <linux/bug.h> -#include <linux/nmi.h> - -#include <asm/stacktrace.h> - - -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) -{ - int graph = 0; - - if (!task) - task = current; - - if (!stack) { - unsigned long dummy; - - stack = &dummy; - if (task && task != current) - stack = (unsigned long *)task->thread.sp; - } - - if (!bp) - bp = stack_frame(task, regs); - - for (;;) { - struct thread_info *context; - - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); - - stack = (unsigned long *)context->previous_esp; - if (!stack) - break; - if (ops->stack(data, "IRQ") < 0) - break; - touch_nmi_watchdog(); - } -} -EXPORT_SYMBOL(dump_trace); - -void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) -{ - unsigned long *stack; - int i; - - if (sp == NULL) { - if (task) - sp = (unsigned long *)task->thread.sp; - else - sp = (unsigned long *)&sp; - } - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (kstack_end(stack)) - break; - if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk(KERN_CONT "\n"); - printk(KERN_CONT " %08lx", *stack++); - touch_nmi_watchdog(); - } - printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); -} - - -void show_registers(struct pt_regs *regs) -{ - int i; - - print_modules(); - __show_regs(regs, !user_mode_vm(regs)); - - printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - current_thread_info(), current, task_thread_info(current)); - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode_vm(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - printk(KERN_EMERG "Stack:\n"); - show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); - - printk(KERN_EMERG "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - printk(KERN_CONT " Bad EIP value."); - break; - } - if (ip == (u8 *)regs->ip) - printk(KERN_CONT "<%02x> ", c); - else - printk(KERN_CONT "%02x ", c); - } - } - printk(KERN_CONT "\n"); -} - -int is_valid_bugaddr(unsigned long ip) -{ - unsigned short ud2; - - if (ip < PAGE_OFFSET) - return 0; - if (probe_kernel_address((unsigned short *)ip, ud2)) - return 0; - - return ud2 == 0x0b0f; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/dumpstack_64.c b/ANDROID_3.4.5/arch/x86/kernel/dumpstack_64.c deleted file mode 100644 index 17107bd6..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/dumpstack_64.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ -#include <linux/kallsyms.h> -#include <linux/kprobes.h> -#include <linux/uaccess.h> -#include <linux/hardirq.h> -#include <linux/kdebug.h> -#include <linux/module.h> -#include <linux/ptrace.h> -#include <linux/kexec.h> -#include <linux/sysfs.h> -#include <linux/bug.h> -#include <linux/nmi.h> - -#include <asm/stacktrace.h> - - -#define N_EXCEPTION_STACKS_END \ - (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) - -static char x86_stack_ids[][8] = { - [ DEBUG_STACK-1 ] = "#DB", - [ NMI_STACK-1 ] = "NMI", - [ DOUBLEFAULT_STACK-1 ] = "#DF", - [ STACKFAULT_STACK-1 ] = "#SS", - [ MCE_STACK-1 ] = "#MC", -#if DEBUG_STKSZ > EXCEPTION_STKSZ - [ N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS_END ] = "#DB[?]" -#endif -}; - -static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) -{ - unsigned k; - - /* - * Iterate over all exception stacks, and figure out whether - * 'stack' is in one of them: - */ - for (k = 0; k < N_EXCEPTION_STACKS; k++) { - unsigned long end = per_cpu(orig_ist, cpu).ist[k]; - /* - * Is 'stack' above this exception frame's end? - * If yes then skip to the next frame. - */ - if (stack >= end) - continue; - /* - * Is 'stack' above this exception frame's start address? - * If yes then we found the right frame. - */ - if (stack >= end - EXCEPTION_STKSZ) { - /* - * Make sure we only iterate through an exception - * stack once. If it comes up for the second time - * then there's something wrong going on - just - * break out and return NULL: - */ - if (*usedp & (1U << k)) - break; - *usedp |= 1U << k; - *idp = x86_stack_ids[k]; - return (unsigned long *)end; - } - /* - * If this is a debug stack, and if it has a larger size than - * the usual exception stacks, then 'stack' might still - * be within the lower portion of the debug stack: - */ -#if DEBUG_STKSZ > EXCEPTION_STKSZ - if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { - unsigned j = N_EXCEPTION_STACKS - 1; - - /* - * Black magic. A large debug stack is composed of - * multiple exception stack entries, which we - * iterate through now. Dont look: - */ - do { - ++j; - end -= EXCEPTION_STKSZ; - x86_stack_ids[j][4] = '1' + - (j - N_EXCEPTION_STACKS); - } while (stack < end - EXCEPTION_STKSZ); - if (*usedp & (1U << j)) - break; - *usedp |= 1U << j; - *idp = x86_stack_ids[j]; - return (unsigned long *)end; - } -#endif - } - return NULL; -} - -static inline int -in_irq_stack(unsigned long *stack, unsigned long *irq_stack, - unsigned long *irq_stack_end) -{ - return (stack >= irq_stack && stack < irq_stack_end); -} - -/* - * x86-64 can have up to three kernel stacks: - * process stack - * interrupt stack - * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack - */ - -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) -{ - const unsigned cpu = get_cpu(); - unsigned long *irq_stack_end = - (unsigned long *)per_cpu(irq_stack_ptr, cpu); - unsigned used = 0; - struct thread_info *tinfo; - int graph = 0; - unsigned long dummy; - - if (!task) - task = current; - - if (!stack) { - if (regs) - stack = (unsigned long *)regs->sp; - else if (task != current) - stack = (unsigned long *)task->thread.sp; - else - stack = &dummy; - } - - if (!bp) - bp = stack_frame(task, regs); - /* - * Print function call entries in all stacks, starting at the - * current stack address. If the stacks consist of nested - * exceptions - */ - tinfo = task_thread_info(task); - for (;;) { - char *id; - unsigned long *estack_end; - estack_end = in_exception_stack(cpu, (unsigned long)stack, - &used, &id); - - if (estack_end) { - if (ops->stack(data, id) < 0) - break; - - bp = ops->walk_stack(tinfo, stack, bp, ops, - data, estack_end, &graph); - ops->stack(data, "<EOE>"); - /* - * We link to the next stack via the - * second-to-last pointer (index -2 to end) in the - * exception stack: - */ - stack = (unsigned long *) estack_end[-2]; - continue; - } - if (irq_stack_end) { - unsigned long *irq_stack; - irq_stack = irq_stack_end - - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - - if (in_irq_stack(stack, irq_stack, irq_stack_end)) { - if (ops->stack(data, "IRQ") < 0) - break; - bp = ops->walk_stack(tinfo, stack, bp, - ops, data, irq_stack_end, &graph); - /* - * We link to the next stack (which would be - * the process stack normally) the last - * pointer (index -1 to end) in the IRQ stack: - */ - stack = (unsigned long *) (irq_stack_end[-1]); - irq_stack_end = NULL; - ops->stack(data, "EOI"); - continue; - } - } - break; - } - - /* - * This handles the process stack: - */ - bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); - put_cpu(); -} -EXPORT_SYMBOL(dump_trace); - -void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) -{ - unsigned long *irq_stack_end; - unsigned long *irq_stack; - unsigned long *stack; - int cpu; - int i; - - preempt_disable(); - cpu = smp_processor_id(); - - irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); - irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); - - /* - * Debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu: - */ - if (sp == NULL) { - if (task) - sp = (unsigned long *)task->thread.sp; - else - sp = (unsigned long *)&sp; - } - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (stack >= irq_stack && stack <= irq_stack_end) { - if (stack == irq_stack_end) { - stack = (unsigned long *) (irq_stack_end[-1]); - printk(KERN_CONT " <EOI> "); - } - } else { - if (((long) stack & (THREAD_SIZE-1)) == 0) - break; - } - if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk(KERN_CONT "\n"); - printk(KERN_CONT " %016lx", *stack++); - touch_nmi_watchdog(); - } - preempt_enable(); - - printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); -} - -void show_registers(struct pt_regs *regs) -{ - int i; - unsigned long sp; - const int cpu = smp_processor_id(); - struct task_struct *cur = current; - - sp = regs->sp; - printk("CPU %d ", cpu); - print_modules(); - __show_regs(regs, 1); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); - - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - printk(KERN_DEFAULT "Stack:\n"); - show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - 0, KERN_DEFAULT); - - printk(KERN_DEFAULT "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - printk(KERN_CONT " Bad RIP value."); - break; - } - if (ip == (u8 *)regs->ip) - printk(KERN_CONT "<%02x> ", c); - else - printk(KERN_CONT "%02x ", c); - } - } - printk(KERN_CONT "\n"); -} - -int is_valid_bugaddr(unsigned long ip) -{ - unsigned short ud2; - - if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) - return 0; - - return ud2 == 0x0b0f; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/e820.c b/ANDROID_3.4.5/arch/x86/kernel/e820.c deleted file mode 100644 index 62d61e99..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/e820.c +++ /dev/null @@ -1,1111 +0,0 @@ -/* - * Handle the memory map. - * The functions here do the job until bootmem takes over. - * - * Getting sanitize_e820_map() in sync with i386 version by applying change: - * - Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach <xela@slit.de>, December 2002. - * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * - */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/crash_dump.h> -#include <linux/export.h> -#include <linux/bootmem.h> -#include <linux/pfn.h> -#include <linux/suspend.h> -#include <linux/acpi.h> -#include <linux/firmware-map.h> -#include <linux/memblock.h> -#include <linux/sort.h> - -#include <asm/e820.h> -#include <asm/proto.h> -#include <asm/setup.h> - -/* - * The e820 map is the map that gets modified e.g. with command line parameters - * and that is also registered with modifications in the kernel resource tree - * with the iomem_resource as parent. - * - * The e820_saved is directly saved after the BIOS-provided memory map is - * copied. It doesn't get modified afterwards. It's registered for the - * /sys/firmware/memmap interface. - * - * That memory map is not modified and is used as base for kexec. The kexec'd - * kernel should get the same memory map as the firmware provides. Then the - * user can e.g. boot the original kernel with mem=1G while still booting the - * next kernel with full memory. - */ -struct e820map e820; -struct e820map e820_saved; - -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0xaeedbabe; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - -/* - * This function checks if any part of the range <start,end> is mapped - * with type. - */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - -/* - * This function checks if the entire range <start,end> is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init e820_all_mapped(u64 start, u64 end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - - /* if the region is at the beginning of <start,end> we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* - * if start is now at or beyond end, we're done, full - * coverage - */ - if (start >= end) - return 1; - } - return 0; -} - -/* - * Add a memory region to the kernel e820 map. - */ -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, - int type) -{ - int x = e820x->nr_map; - - if (x >= ARRAY_SIZE(e820x->map)) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820x->map[x].addr = start; - e820x->map[x].size = size; - e820x->map[x].type = type; - e820x->nr_map++; -} - -void __init e820_add_region(u64 start, u64 size, int type) -{ - __e820_add_region(&e820, start, size, type); -} - -static void __init e820_print_type(u32 type) -{ - switch (type) { - case E820_RAM: - case E820_RESERVED_KERN: - printk(KERN_CONT "(usable)"); - break; - case E820_RESERVED: - printk(KERN_CONT "(reserved)"); - break; - case E820_ACPI: - printk(KERN_CONT "(ACPI data)"); - break; - case E820_NVS: - printk(KERN_CONT "(ACPI NVS)"); - break; - case E820_UNUSABLE: - printk(KERN_CONT "(unusable)"); - break; - default: - printk(KERN_CONT "type %u", type); - break; - } -} - -void __init e820_print_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) - (e820.map[i].addr + e820.map[i].size)); - e820_print_type(e820.map[i].type); - printk(KERN_CONT "\n"); - } -} - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps, - * and resolving conflicting memory types in favor of highest - * numbered type. - * - * The input parameter biosmap points to an array of 'struct - * e820entry' which on entry has elements in the range [0, *pnr_map) - * valid, and which has space for up to max_nr_map entries. - * On return, the resulting sanitized e820 map entries will be in - * overwritten in the same location, starting at biosmap. - * - * The integer pointed to by pnr_map must be valid on entry (the - * current number of valid entries located at biosmap) and will - * be updated on return, with the new number of valid entries - * (something no more than max_nr_map.) - * - * The return value from sanitize_e820_map() is zero if it - * successfully 'sanitized' the map entries passed in, and is -1 - * if it did nothing, which can happen if either of (1) it was - * only passed one map entry, or (2) any of the input map entries - * were invalid (start + size < start, meaning that the size was - * so big the described memory range wrapped around through zero.) - * - * Visually we're performing the following - * (1,2,3,4 = memory types)... - * - * Sample memory map (w/overlaps): - * ____22__________________ - * ______________________4_ - * ____1111________________ - * _44_____________________ - * 11111111________________ - * ____________________33__ - * ___________44___________ - * __________33333_________ - * ______________22________ - * ___________________2222_ - * _________111111111______ - * _____________________11_ - * _________________4______ - * - * Sanitized equivalent (no overlap): - * 1_______________________ - * _44_____________________ - * ___1____________________ - * ____22__________________ - * ______11________________ - * _________1______________ - * __________3_____________ - * ___________44___________ - * _____________33_________ - * _______________2________ - * ________________1_______ - * _________________4______ - * ___________________2____ - * ____________________33__ - * ______________________4_ - */ -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; - -static int __init cpcompare(const void *a, const void *b) -{ - struct change_member * const *app = a, * const *bpp = b; - const struct change_member *ap = *app, *bp = *bpp; - - /* - * Inputs are pointers to two elements of change_point[]. If their - * addresses are unequal, their difference dominates. If the addresses - * are equal, then consider one that represents the end of its region - * to be greater than one that does not. - */ - if (ap->addr != bp->addr) - return ap->addr > bp->addr ? 1 : -1; - - return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); -} - -int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, - u32 *pnr_map) -{ - static struct change_member change_point_list[2*E820_X_MAX] __initdata; - static struct change_member *change_point[2*E820_X_MAX] __initdata; - static struct e820entry *overlap_list[E820_X_MAX] __initdata; - static struct e820entry new_bios[E820_X_MAX] __initdata; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - BUG_ON(old_nr > max_nr_map); - - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) - return -1; - - /* create pointers for initial change-point information (for sorting) */ - for (i = 0; i < 2 * old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i = 0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; - - /* sort change-point list by memory addresses (low -> high) */ - sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); - - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - - /* loop through change-points, determining affect on the new bios map */ - for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; - } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ - for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* - * if there are overlapping entries, decide which - * "type" to use (larger value takes precedence -- - * 1=usable, 2,3,4,4+=unusable) - */ - current_type = 0; - for (i = 0; i < overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ - if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ - if (++new_bios_entry >= max_nr_map) - break; - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr = change_point[chgidx]->addr; - } - last_type = current_type; - } - } - /* retain count for new bios entries */ - new_nr = new_bios_entry; - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) -{ - while (nr_map) { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - e820_add_region(start, size, type); - - biosmap++; - nr_map--; - } - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - */ -static int __init append_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - return __append_e820_map(biosmap, nr_map); -} - -static u64 __init __e820_update_range(struct e820map *e820x, u64 start, - u64 size, unsigned old_type, - unsigned new_type) -{ - u64 end; - unsigned int i; - u64 real_updated_size = 0; - - BUG_ON(old_type == new_type); - - if (size > (ULLONG_MAX - start)) - size = ULLONG_MAX - start; - - end = start + size; - printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); - e820_print_type(old_type); - printk(KERN_CONT " ==> "); - e820_print_type(new_type); - printk(KERN_CONT "\n"); - - for (i = 0; i < e820x->nr_map; i++) { - struct e820entry *ei = &e820x->map[i]; - u64 final_start, final_end; - u64 ei_end; - - if (ei->type != old_type) - continue; - - ei_end = ei->addr + ei->size; - /* totally covered by new range? */ - if (ei->addr >= start && ei_end <= end) { - ei->type = new_type; - real_updated_size += ei->size; - continue; - } - - /* new range is totally covered? */ - if (ei->addr < start && ei_end > end) { - __e820_add_region(e820x, start, size, new_type); - __e820_add_region(e820x, end, ei_end - end, ei->type); - ei->size = start - ei->addr; - real_updated_size += size; - continue; - } - - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(end, ei_end); - if (final_start >= final_end) - continue; - - __e820_add_region(e820x, final_start, final_end - final_start, - new_type); - - real_updated_size += final_end - final_start; - - /* - * left range could be head or tail, so need to update - * size at first. - */ - ei->size -= final_end - final_start; - if (ei->addr < final_start) - continue; - ei->addr = final_end; - } - return real_updated_size; -} - -u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - return __e820_update_range(&e820, start, size, old_type, new_type); -} - -static u64 __init e820_update_range_saved(u64 start, u64 size, - unsigned old_type, unsigned new_type) -{ - return __e820_update_range(&e820_saved, start, size, old_type, - new_type); -} - -/* make e820 not cover the range */ -u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, - int checktype) -{ - int i; - u64 end; - u64 real_removed_size = 0; - - if (size > (ULLONG_MAX - start)) - size = ULLONG_MAX - start; - - end = start + size; - printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); - if (checktype) - e820_print_type(old_type); - printk(KERN_CONT "\n"); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - u64 ei_end; - - if (checktype && ei->type != old_type) - continue; - - ei_end = ei->addr + ei->size; - /* totally covered? */ - if (ei->addr >= start && ei_end <= end) { - real_removed_size += ei->size; - memset(ei, 0, sizeof(struct e820entry)); - continue; - } - - /* new range is totally covered? */ - if (ei->addr < start && ei_end > end) { - e820_add_region(end, ei_end - end, ei->type); - ei->size = start - ei->addr; - real_removed_size += size; - continue; - } - - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(end, ei_end); - if (final_start >= final_end) - continue; - real_removed_size += final_end - final_start; - - /* - * left range could be head or tail, so need to update - * size at first. - */ - ei->size -= final_end - final_start; - if (ei->addr < final_start) - continue; - ei->addr = final_end; - } - return real_removed_size; -} - -void __init update_e820(void) -{ - u32 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - e820_print_map("modified"); -} -static void __init update_e820_saved(void) -{ - u32 nr_map; - - nr_map = e820_saved.nr_map; - if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) - return; - e820_saved.nr_map = nr_map; -} -#define MAX_GAP_END 0x100000000ull -/* - * Search for a gap in the e820 memory space from start_addr to end_addr. - */ -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr) -{ - unsigned long long last; - int i = e820.nr_map; - int found = 0; - - last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; - - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - if (end < start_addr) - continue; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap >= *gapsize) { - *gapsize = gap; - *gapstart = end; - found = 1; - } - } - if (start < last) - last = start; - } - return found; -} - -/* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. We pass this space to PCI to assign MMIO resources - * for hotplug or unconfigured devices in. - * Hopefully the BIOS let enough space left. - */ -__init void e820_setup_gap(void) -{ - unsigned long gapstart, gapsize; - int found; - - gapstart = 0x10000000; - gapsize = 0x400000; - found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); - -#ifdef CONFIG_X86_64 - if (!found) { - gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR - "PCI: Warning: Cannot find a gap in the 32bit address range\n" - "PCI: Unassigned devices with 32bit resource registers may break!\n"); - } -#endif - - /* - * e820_reserve_resources_late protect stolen RAM already - */ - pci_mem_start = gapstart; - - printk(KERN_INFO - "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); -} - -/** - * Because of the size limitation of struct boot_params, only first - * 128 E820 memory entries are passed to kernel via - * boot_params.e820_map, others are passed via SETUP_E820_EXT node of - * linked list of struct setup_data, which is parsed here. - */ -void __init parse_e820_ext(struct setup_data *sdata) -{ - int entries; - struct e820entry *extmap; - - entries = sdata->len / sizeof(struct e820entry); - extmap = (struct e820entry *)(sdata->data); - __append_e820_map(extmap, entries); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); - printk(KERN_INFO "extended physical RAM map:\n"); - e820_print_map("extended"); -} - -#if defined(CONFIG_X86_64) || \ - (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) -/** - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for - * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(unsigned long limit_pfn) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= limit_pfn) - break; - } -} -#endif - -#ifdef CONFIG_ACPI -/** - * Mark ACPI NVS memory region, so that we can save/restore it during - * hibernation and the subsequent resume. - */ -static int __init e820_mark_nvs_memory(void) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (ei->type == E820_NVS) - acpi_nvs_register(ei->addr, ei->size); - } - - return 0; -} -core_initcall(e820_mark_nvs_memory); -#endif - -/* - * pre allocated 4k and reserved it in memblock and e820_saved - */ -u64 __init early_reserve_e820(u64 size, u64 align) -{ - u64 addr; - - addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); - if (addr) { - e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); - printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); - update_e820_saved(); - } - - return addr; -} - -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_PAE -# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) -# else -# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) -# endif -#else /* CONFIG_X86_32 */ -# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT -#endif - -/* - * Find the highest page frame number we have available - */ -static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) -{ - int i; - unsigned long last_pfn = 0; - unsigned long max_arch_pfn = MAX_ARCH_PFN; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long start_pfn; - unsigned long end_pfn; - - if (ei->type != type) - continue; - - start_pfn = ei->addr >> PAGE_SHIFT; - end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; - - if (start_pfn >= limit_pfn) - continue; - if (end_pfn > limit_pfn) { - last_pfn = limit_pfn; - break; - } - if (end_pfn > last_pfn) - last_pfn = end_pfn; - } - - if (last_pfn > max_arch_pfn) - last_pfn = max_arch_pfn; - - printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", - last_pfn, max_arch_pfn); - return last_pfn; -} -unsigned long __init e820_end_of_ram_pfn(void) -{ - return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); -} - -unsigned long __init e820_end_of_low_ram_pfn(void) -{ - return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); -} - -static void early_panic(char *msg) -{ - early_printk(msg); - panic(msg); -} - -static int userdef __initdata; - -/* "mem=nopentium" disables the 4MB page tables. */ -static int __init parse_memopt(char *p) -{ - u64 mem_size; - - if (!p) - return -EINVAL; - - if (!strcmp(p, "nopentium")) { -#ifdef CONFIG_X86_32 - setup_clear_cpu_cap(X86_FEATURE_PSE); - return 0; -#else - printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n"); - return -EINVAL; -#endif - } - - userdef = 1; - mem_size = memparse(p, &p); - /* don't remove all of memory when handling "mem={invalid}" param */ - if (mem_size == 0) - return -EINVAL; - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); - - return 0; -} -early_param("mem", parse_memopt); - -static int __init parse_memmap_opt(char *p) -{ - char *oldp; - u64 start_at, mem_size; - - if (!p) - return -EINVAL; - - if (!strncmp(p, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* - * If we are doing a crash dump, we still need to know - * the real mem size before original memory map is - * reset. - */ - saved_max_pfn = e820_end_of_ram_pfn(); -#endif - e820.nr_map = 0; - userdef = 1; - return 0; - } - - oldp = p; - mem_size = memparse(p, &p); - if (p == oldp) - return -EINVAL; - - userdef = 1; - if (*p == '@') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RAM); - } else if (*p == '#') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_ACPI); - } else if (*p == '$') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RESERVED); - } else - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); - - return *p == '\0' ? 0 : -EINVAL; -} -early_param("memmap", parse_memmap_opt); - -void __init finish_e820_parsing(void) -{ - if (userdef) { - u32 nr = e820.nr_map; - - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) - early_panic("Invalid user supplied memory map"); - e820.nr_map = nr; - - printk(KERN_INFO "user-defined physical RAM map:\n"); - e820_print_map("user"); - } -} - -static inline const char *e820_type_to_string(int e820_type) -{ - switch (e820_type) { - case E820_RESERVED_KERN: - case E820_RAM: return "System RAM"; - case E820_ACPI: return "ACPI Tables"; - case E820_NVS: return "ACPI Non-volatile Storage"; - case E820_UNUSABLE: return "Unusable memory"; - default: return "reserved"; - } -} - -/* - * Mark e820 reserved areas as busy for the resource manager. - */ -static struct resource __initdata *e820_res; -void __init e820_reserve_resources(void) -{ - int i; - struct resource *res; - u64 end; - - res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); - e820_res = res; - for (i = 0; i < e820.nr_map; i++) { - end = e820.map[i].addr + e820.map[i].size - 1; - if (end != (resource_size_t)end) { - res++; - continue; - } - res->name = e820_type_to_string(e820.map[i].type); - res->start = e820.map[i].addr; - res->end = end; - - res->flags = IORESOURCE_MEM; - - /* - * don't register the region that could be conflicted with - * pci device BAR resource and insert them later in - * pcibios_resource_survey() - */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { - res->flags |= IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - } - res++; - } - - for (i = 0; i < e820_saved.nr_map; i++) { - struct e820entry *entry = &e820_saved.map[i]; - firmware_map_add_early(entry->addr, - entry->addr + entry->size - 1, - e820_type_to_string(entry->type)); - } -} - -/* How much should we pad RAM ending depending on where it is? */ -static unsigned long ram_alignment(resource_size_t pos) -{ - unsigned long mb = pos >> 20; - - /* To 64kB in the first megabyte */ - if (!mb) - return 64*1024; - - /* To 1MB in the first 16MB */ - if (mb < 16) - return 1024*1024; - - /* To 64MB for anything above that */ - return 64*1024*1024; -} - -#define MAX_RESOURCE_SIZE ((resource_size_t)-1) - -void __init e820_reserve_resources_late(void) -{ - int i; - struct resource *res; - - res = e820_res; - for (i = 0; i < e820.nr_map; i++) { - if (!res->parent && res->end) - insert_resource_expand_to_fit(&iomem_resource, res); - res++; - } - - /* - * Try to bump up RAM regions to reasonable boundaries to - * avoid stolen RAM: - */ - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *entry = &e820.map[i]; - u64 start, end; - - if (entry->type != E820_RAM) - continue; - start = entry->addr + entry->size; - end = round_up(start, ram_alignment(start)) - 1; - if (end > MAX_RESOURCE_SIZE) - end = MAX_RESOURCE_SIZE; - if (start >= end) - continue; - printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", - start, end); - reserve_region_with_split(&iomem_resource, start, end, - "RAM buffer"); - } -} - -char *__init default_machine_specific_memory_setup(void) -{ - char *who = "BIOS-e820"; - u32 new_nr; - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - new_nr = boot_params.e820_entries; - sanitize_e820_map(boot_params.e820_map, - ARRAY_SIZE(boot_params.e820_map), - &new_nr); - boot_params.e820_entries = new_nr; - if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) - < 0) { - u64 mem_size; - - /* compare results from other methods and take the greater */ - if (boot_params.alt_mem_k - < boot_params.screen_info.ext_mem_k) { - mem_size = boot_params.screen_info.ext_mem_k; - who = "BIOS-88"; - } else { - mem_size = boot_params.alt_mem_k; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - e820_add_region(0, LOWMEMSIZE(), E820_RAM); - e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } - - /* In case someone cares... */ - return who; -} - -void __init setup_memory_map(void) -{ - char *who; - - who = x86_init.resources.memory_setup(); - memcpy(&e820_saved, &e820, sizeof(struct e820map)); - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - e820_print_map(who); -} - -void __init memblock_x86_fill(void) -{ - int i; - u64 end; - - /* - * EFI may have more than 128 entries - * We are safe to enable resizing, beause memblock_x86_fill() - * is rather later for x86 - */ - memblock_allow_resize(); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - end = ei->addr + ei->size; - if (end != (resource_size_t)end) - continue; - - if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) - continue; - - memblock_add(ei->addr, ei->size); - } - - memblock_dump_all(); -} - -void __init memblock_find_dma_reserve(void) -{ -#ifdef CONFIG_X86_64 - u64 nr_pages = 0, nr_free_pages = 0; - unsigned long start_pfn, end_pfn; - phys_addr_t start, end; - int i; - u64 u; - - /* - * need to find out used area below MAX_DMA_PFN - * need to use memblock to get free size in [0, MAX_DMA_PFN] - * at first, and assume boot_mem will not take below MAX_DMA_PFN - */ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN); - end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN); - nr_pages += end_pfn - start_pfn; - } - - for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { - start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); - end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); - if (start_pfn < end_pfn) - nr_free_pages += end_pfn - start_pfn; - } - - set_dma_reserve(nr_pages - nr_free_pages); -#endif -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/early-quirks.c b/ANDROID_3.4.5/arch/x86/kernel/early-quirks.c deleted file mode 100644 index 3755ef49..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/early-quirks.c +++ /dev/null @@ -1,292 +0,0 @@ -/* Various workarounds for chipset bugs. - This code runs very early and can't use the regular PCI subsystem - The entries are keyed to PCI bridges which usually identify chipsets - uniquely. - This is only for whole classes of chipsets with specific problems which - need early invasive action (e.g. before the timers are initialized). - Most PCI device specific workarounds can be done later and should be - in standard PCI quirks - Mainboard specific bugs should be handled by DMI entries. - CPU specific bugs in setup.c */ - -#include <linux/pci.h> -#include <linux/acpi.h> -#include <linux/pci_ids.h> -#include <asm/pci-direct.h> -#include <asm/dma.h> -#include <asm/io_apic.h> -#include <asm/apic.h> -#include <asm/iommu.h> -#include <asm/gart.h> - -static void __init fix_hypertransport_config(int num, int slot, int func) -{ - u32 htcfg; - /* - * we found a hypertransport bus - * make sure that we are broadcasting - * interrupts to all cpus on the ht bus - * if we're using extended apic ids - */ - htcfg = read_pci_config(num, slot, func, 0x68); - if (htcfg & (1 << 18)) { - printk(KERN_INFO "Detected use of extended apic ids " - "on hypertransport bus\n"); - if ((htcfg & (1 << 17)) == 0) { - printk(KERN_INFO "Enabling hypertransport extended " - "apic interrupt broadcast\n"); - printk(KERN_INFO "Note this is a bios bug, " - "please contact your hw vendor\n"); - htcfg |= (1 << 17); - write_pci_config(num, slot, func, 0x68, htcfg); - } - } - - -} - -static void __init via_bugs(int num, int slot, int func) -{ -#ifdef CONFIG_GART_IOMMU - if ((max_pfn > MAX_DMA32_PFN || force_iommu) && - !gart_iommu_aperture_allowed) { - printk(KERN_INFO - "Looks like a VIA chipset. Disabling IOMMU." - " Override with iommu=allowed\n"); - gart_iommu_aperture_disabled = 1; - } -#endif -} - -#ifdef CONFIG_ACPI -#ifdef CONFIG_X86_IO_APIC - -static int __init nvidia_hpet_check(struct acpi_table_header *header) -{ - return 0; -} -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ - -static void __init nvidia_bugs(int num, int slot, int func) -{ -#ifdef CONFIG_ACPI -#ifdef CONFIG_X86_IO_APIC - /* - * All timer overrides on Nvidia are - * wrong unless HPET is enabled. - * Unfortunately that's not true on many Asus boards. - * We don't know yet how to detect this automatically, but - * at least allow a command line override. - */ - if (acpi_use_timer_override) - return; - - if (acpi_table_parse(ACPI_SIG_HPET, nvidia_hpet_check)) { - acpi_skip_timer_override = 1; - printk(KERN_INFO "Nvidia board " - "detected. Ignoring ACPI " - "timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - } -#endif -#endif - /* RED-PEN skip them on mptables too? */ - -} - -#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) -static u32 __init ati_ixp4x0_rev(int num, int slot, int func) -{ - u32 d; - u8 b; - - b = read_pci_config_byte(num, slot, func, 0xac); - b &= ~(1<<5); - write_pci_config_byte(num, slot, func, 0xac, b); - - d = read_pci_config(num, slot, func, 0x70); - d |= 1<<8; - write_pci_config(num, slot, func, 0x70, d); - - d = read_pci_config(num, slot, func, 0x8); - d &= 0xff; - return d; -} - -static void __init ati_bugs(int num, int slot, int func) -{ - u32 d; - u8 b; - - if (acpi_use_timer_override) - return; - - d = ati_ixp4x0_rev(num, slot, func); - if (d < 0x82) - acpi_skip_timer_override = 1; - else { - /* check for IRQ0 interrupt swap */ - outb(0x72, 0xcd6); b = inb(0xcd7); - if (!(b & 0x2)) - acpi_skip_timer_override = 1; - } - - if (acpi_skip_timer_override) { - printk(KERN_INFO "SB4X0 revision 0x%x\n", d); - printk(KERN_INFO "Ignoring ACPI timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - } -} - -static u32 __init ati_sbx00_rev(int num, int slot, int func) -{ - u32 d; - - d = read_pci_config(num, slot, func, 0x8); - d &= 0xff; - - return d; -} - -static void __init ati_bugs_contd(int num, int slot, int func) -{ - u32 d, rev; - - rev = ati_sbx00_rev(num, slot, func); - if (rev >= 0x40) - acpi_fix_pin2_polarity = 1; - - /* - * SB600: revisions 0x11, 0x12, 0x13, 0x14, ... - * SB700: revisions 0x39, 0x3a, ... - * SB800: revisions 0x40, 0x41, ... - */ - if (rev >= 0x39) - return; - - if (acpi_use_timer_override) - return; - - /* check for IRQ0 interrupt swap */ - d = read_pci_config(num, slot, func, 0x64); - if (!(d & (1<<14))) - acpi_skip_timer_override = 1; - - if (acpi_skip_timer_override) { - printk(KERN_INFO "SB600 revision 0x%x\n", rev); - printk(KERN_INFO "Ignoring ACPI timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - } -} -#else -static void __init ati_bugs(int num, int slot, int func) -{ -} - -static void __init ati_bugs_contd(int num, int slot, int func) -{ -} -#endif - -#define QFLAG_APPLY_ONCE 0x1 -#define QFLAG_APPLIED 0x2 -#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) -struct chipset { - u32 vendor; - u32 device; - u32 class; - u32 class_mask; - u32 flags; - void (*f)(int num, int slot, int func); -}; - -/* - * Only works for devices on the root bus. If you add any devices - * not on bus 0 readd another loop level in early_quirks(). But - * be careful because at least the Nvidia quirk here relies on - * only matching on bus 0. - */ -static struct chipset early_qrk[] __initdata = { - { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, - PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, - { PCI_VENDOR_ID_VIA, PCI_ANY_ID, - PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, - { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, - { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, - PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, - { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, - PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, - {} -}; - -/** - * check_dev_quirk - apply early quirks to a given PCI device - * @num: bus number - * @slot: slot number - * @func: PCI function - * - * Check the vendor & device ID against the early quirks table. - * - * If the device is single function, let early_quirks() know so we don't - * poke at this device again. - */ -static int __init check_dev_quirk(int num, int slot, int func) -{ - u16 class; - u16 vendor; - u16 device; - u8 type; - int i; - - class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); - - if (class == 0xffff) - return -1; /* no class, treat as single function */ - - vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); - - device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); - - for (i = 0; early_qrk[i].f != NULL; i++) { - if (((early_qrk[i].vendor == PCI_ANY_ID) || - (early_qrk[i].vendor == vendor)) && - ((early_qrk[i].device == PCI_ANY_ID) || - (early_qrk[i].device == device)) && - (!((early_qrk[i].class ^ class) & - early_qrk[i].class_mask))) { - if ((early_qrk[i].flags & - QFLAG_DONE) != QFLAG_DONE) - early_qrk[i].f(num, slot, func); - early_qrk[i].flags |= QFLAG_APPLIED; - } - } - - type = read_pci_config_byte(num, slot, func, - PCI_HEADER_TYPE); - if (!(type & 0x80)) - return -1; - - return 0; -} - -void __init early_quirks(void) -{ - int slot, func; - - if (!early_pci_allowed()) - return; - - /* Poor man's PCI discovery */ - /* Only scan the root bus */ - for (slot = 0; slot < 32; slot++) - for (func = 0; func < 8; func++) { - /* Only probe function 0 on single fn devices */ - if (check_dev_quirk(0, slot, func)) - break; - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/early_printk.c b/ANDROID_3.4.5/arch/x86/kernel/early_printk.c deleted file mode 100644 index 9b9f18b4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/early_printk.c +++ /dev/null @@ -1,259 +0,0 @@ -#include <linux/console.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/screen_info.h> -#include <linux/usb/ch9.h> -#include <linux/pci_regs.h> -#include <linux/pci_ids.h> -#include <linux/errno.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/fcntl.h> -#include <asm/setup.h> -#include <xen/hvc-console.h> -#include <asm/pci-direct.h> -#include <asm/fixmap.h> -#include <asm/mrst.h> -#include <asm/pgtable.h> -#include <linux/usb/ehci_def.h> - -/* Simple VGA output */ -#define VGABASE (__ISA_IO_base + 0xb8000) - -static int max_ypos = 25, max_xpos = 80; -static int current_ypos = 25, current_xpos; - -static void early_vga_write(struct console *con, const char *str, unsigned n) -{ - char c; - int i, k, j; - - while ((c = *str++) != '\0' && n-- > 0) { - if (current_ypos >= max_ypos) { - /* scroll 1 line up */ - for (k = 1, j = 0; k < max_ypos; k++, j++) { - for (i = 0; i < max_xpos; i++) { - writew(readw(VGABASE+2*(max_xpos*k+i)), - VGABASE + 2*(max_xpos*j + i)); - } - } - for (i = 0; i < max_xpos; i++) - writew(0x720, VGABASE + 2*(max_xpos*j + i)); - current_ypos = max_ypos-1; - } -#ifdef CONFIG_KGDB_KDB - if (c == '\b') { - if (current_xpos > 0) - current_xpos--; - } else if (c == '\r') { - current_xpos = 0; - } else -#endif - if (c == '\n') { - current_xpos = 0; - current_ypos++; - } else if (c != '\r') { - writew(((0x7 << 8) | (unsigned short) c), - VGABASE + 2*(max_xpos*current_ypos + - current_xpos++)); - if (current_xpos >= max_xpos) { - current_xpos = 0; - current_ypos++; - } - } - } -} - -static struct console early_vga_console = { - .name = "earlyvga", - .write = early_vga_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; - -/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ - -static int early_serial_base = 0x3f8; /* ttyS0 */ - -#define XMTRDY 0x20 - -#define DLAB 0x80 - -#define TXR 0 /* Transmit register (WRITE) */ -#define RXR 0 /* Receive register (READ) */ -#define IER 1 /* Interrupt Enable */ -#define IIR 2 /* Interrupt ID */ -#define FCR 2 /* FIFO control */ -#define LCR 3 /* Line control */ -#define MCR 4 /* Modem control */ -#define LSR 5 /* Line Status */ -#define MSR 6 /* Modem Status */ -#define DLL 0 /* Divisor Latch Low */ -#define DLH 1 /* Divisor latch High */ - -static int early_serial_putc(unsigned char ch) -{ - unsigned timeout = 0xffff; - - while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) - cpu_relax(); - outb(ch, early_serial_base + TXR); - return timeout ? 0 : -1; -} - -static void early_serial_write(struct console *con, const char *s, unsigned n) -{ - while (*s && n-- > 0) { - if (*s == '\n') - early_serial_putc('\r'); - early_serial_putc(*s); - s++; - } -} - -#define DEFAULT_BAUD 9600 - -static __init void early_serial_init(char *s) -{ - unsigned char c; - unsigned divisor; - unsigned baud = DEFAULT_BAUD; - char *e; - - if (*s == ',') - ++s; - - if (*s) { - unsigned port; - if (!strncmp(s, "0x", 2)) { - early_serial_base = simple_strtoul(s, &e, 16); - } else { - static const int __initconst bases[] = { 0x3f8, 0x2f8 }; - - if (!strncmp(s, "ttyS", 4)) - s += 4; - port = simple_strtoul(s, &e, 10); - if (port > 1 || s == e) - port = 0; - early_serial_base = bases[port]; - } - s += strcspn(s, ","); - if (*s == ',') - s++; - } - - outb(0x3, early_serial_base + LCR); /* 8n1 */ - outb(0, early_serial_base + IER); /* no interrupt */ - outb(0, early_serial_base + FCR); /* no fifo */ - outb(0x3, early_serial_base + MCR); /* DTR + RTS */ - - if (*s) { - baud = simple_strtoul(s, &e, 0); - if (baud == 0 || s == e) - baud = DEFAULT_BAUD; - } - - divisor = 115200 / baud; - c = inb(early_serial_base + LCR); - outb(c | DLAB, early_serial_base + LCR); - outb(divisor & 0xff, early_serial_base + DLL); - outb((divisor >> 8) & 0xff, early_serial_base + DLH); - outb(c & ~DLAB, early_serial_base + LCR); -} - -static struct console early_serial_console = { - .name = "earlyser", - .write = early_serial_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; - -/* Direct interface for emergencies */ -static struct console *early_console = &early_vga_console; -static int __initdata early_console_initialized; - -asmlinkage void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - - va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_console->write(early_console, buf, n); - va_end(ap); -} - -static inline void early_console_register(struct console *con, int keep_early) -{ - if (early_console->index != -1) { - printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", - con->name); - return; - } - early_console = con; - if (keep_early) - early_console->flags &= ~CON_BOOT; - else - early_console->flags |= CON_BOOT; - register_console(early_console); -} - -static int __init setup_early_printk(char *buf) -{ - int keep; - - if (!buf) - return 0; - - if (early_console_initialized) - return 0; - early_console_initialized = 1; - - keep = (strstr(buf, "keep") != NULL); - - while (*buf != '\0') { - if (!strncmp(buf, "serial", 6)) { - buf += 6; - early_serial_init(buf); - early_console_register(&early_serial_console, keep); - if (!strncmp(buf, ",ttyS", 5)) - buf += 5; - } - if (!strncmp(buf, "ttyS", 4)) { - early_serial_init(buf + 4); - early_console_register(&early_serial_console, keep); - } - if (!strncmp(buf, "vga", 3) && - boot_params.screen_info.orig_video_isVGA == 1) { - max_xpos = boot_params.screen_info.orig_video_cols; - max_ypos = boot_params.screen_info.orig_video_lines; - current_ypos = boot_params.screen_info.orig_y; - early_console_register(&early_vga_console, keep); - } -#ifdef CONFIG_EARLY_PRINTK_DBGP - if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4)) - early_console_register(&early_dbgp_console, keep); -#endif -#ifdef CONFIG_HVC_XEN - if (!strncmp(buf, "xen", 3)) - early_console_register(&xenboot_console, keep); -#endif -#ifdef CONFIG_EARLY_PRINTK_INTEL_MID - if (!strncmp(buf, "mrst", 4)) { - mrst_early_console_init(); - early_console_register(&early_mrst_console, keep); - } - - if (!strncmp(buf, "hsu", 3)) { - hsu_early_console_init(buf + 3); - early_console_register(&early_hsu_console, keep); - } -#endif - buf++; - } - return 0; -} - -early_param("earlyprintk", setup_early_printk); diff --git a/ANDROID_3.4.5/arch/x86/kernel/entry_32.S b/ANDROID_3.4.5/arch/x86/kernel/entry_32.S deleted file mode 100644 index 7b784f4e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/entry_32.S +++ /dev/null @@ -1,1417 +0,0 @@ -/* - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * NOTE: This code handles signal-recognition, which happens every time - * after a timer-interrupt and after each system call. - * - * I changed all the .align's to 4 (16 byte alignment), as that's faster - * on a 486. - * - * Stack layout in 'syscall_exit': - * ptrace needs to have all regs on the stack. - * if the order here is changed, it needs to be - * updated in fork.c:copy_process, signal.c:do_signal, - * ptrace.c and ptrace.h - * - * 0(%esp) - %ebx - * 4(%esp) - %ecx - * 8(%esp) - %edx - * C(%esp) - %esi - * 10(%esp) - %edi - * 14(%esp) - %ebp - * 18(%esp) - %eax - * 1C(%esp) - %ds - * 20(%esp) - %es - * 24(%esp) - %fs - * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS - * 2C(%esp) - orig_eax - * 30(%esp) - %eip - * 34(%esp) - %cs - * 38(%esp) - %eflags - * 3C(%esp) - %oldesp - * 40(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. - */ - -#include <linux/linkage.h> -#include <linux/err.h> -#include <asm/thread_info.h> -#include <asm/irqflags.h> -#include <asm/errno.h> -#include <asm/segment.h> -#include <asm/smp.h> -#include <asm/page_types.h> -#include <asm/percpu.h> -#include <asm/dwarf2.h> -#include <asm/processor-flags.h> -#include <asm/ftrace.h> -#include <asm/irq_vectors.h> -#include <asm/cpufeature.h> -#include <asm/alternative-asm.h> - -/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ -#include <linux/elf-em.h> -#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) -#define __AUDIT_ARCH_LE 0x40000000 - -#ifndef CONFIG_AUDITSYSCALL -#define sysenter_audit syscall_trace_entry -#define sysexit_audit syscall_exit_work -#endif - - .section .entry.text, "ax" - -/* - * We use macros for low-level operations which need to be overridden - * for paravirtualization. The following will never clobber any registers: - * INTERRUPT_RETURN (aka. "iret") - * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). - * - * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must - * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). - * Allowing a register to be clobbered can shrink the paravirt replacement - * enough to patch inline, increasing performance. - */ - -#ifdef CONFIG_PREEMPT -#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF -#else -#define preempt_stop(clobbers) -#define resume_kernel restore_all -#endif - -.macro TRACE_IRQS_IRET -#ifdef CONFIG_TRACE_IRQFLAGS - testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off? - jz 1f - TRACE_IRQS_ON -1: -#endif -.endm - -/* - * User gs save/restore - * - * %gs is used for userland TLS and kernel only uses it for stack - * canary which is required to be at %gs:20 by gcc. Read the comment - * at the top of stackprotector.h for more info. - * - * Local labels 98 and 99 are used. - */ -#ifdef CONFIG_X86_32_LAZY_GS - - /* unfortunately push/pop can't be no-op */ -.macro PUSH_GS - pushl_cfi $0 -.endm -.macro POP_GS pop=0 - addl $(4 + \pop), %esp - CFI_ADJUST_CFA_OFFSET -(4 + \pop) -.endm -.macro POP_GS_EX -.endm - - /* all the rest are no-op */ -.macro PTGS_TO_GS -.endm -.macro PTGS_TO_GS_EX -.endm -.macro GS_TO_REG reg -.endm -.macro REG_TO_PTGS reg -.endm -.macro SET_KERNEL_GS reg -.endm - -#else /* CONFIG_X86_32_LAZY_GS */ - -.macro PUSH_GS - pushl_cfi %gs - /*CFI_REL_OFFSET gs, 0*/ -.endm - -.macro POP_GS pop=0 -98: popl_cfi %gs - /*CFI_RESTORE gs*/ - .if \pop <> 0 - add $\pop, %esp - CFI_ADJUST_CFA_OFFSET -\pop - .endif -.endm -.macro POP_GS_EX -.pushsection .fixup, "ax" -99: movl $0, (%esp) - jmp 98b -.section __ex_table, "a" - .align 4 - .long 98b, 99b -.popsection -.endm - -.macro PTGS_TO_GS -98: mov PT_GS(%esp), %gs -.endm -.macro PTGS_TO_GS_EX -.pushsection .fixup, "ax" -99: movl $0, PT_GS(%esp) - jmp 98b -.section __ex_table, "a" - .align 4 - .long 98b, 99b -.popsection -.endm - -.macro GS_TO_REG reg - movl %gs, \reg - /*CFI_REGISTER gs, \reg*/ -.endm -.macro REG_TO_PTGS reg - movl \reg, PT_GS(%esp) - /*CFI_REL_OFFSET gs, PT_GS*/ -.endm -.macro SET_KERNEL_GS reg - movl $(__KERNEL_STACK_CANARY), \reg - movl \reg, %gs -.endm - -#endif /* CONFIG_X86_32_LAZY_GS */ - -.macro SAVE_ALL - cld - PUSH_GS - pushl_cfi %fs - /*CFI_REL_OFFSET fs, 0;*/ - pushl_cfi %es - /*CFI_REL_OFFSET es, 0;*/ - pushl_cfi %ds - /*CFI_REL_OFFSET ds, 0;*/ - pushl_cfi %eax - CFI_REL_OFFSET eax, 0 - pushl_cfi %ebp - CFI_REL_OFFSET ebp, 0 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %edx - CFI_REL_OFFSET edx, 0 - pushl_cfi %ecx - CFI_REL_OFFSET ecx, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 - movl $(__USER_DS), %edx - movl %edx, %ds - movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs - SET_KERNEL_GS %edx -.endm - -.macro RESTORE_INT_REGS - popl_cfi %ebx - CFI_RESTORE ebx - popl_cfi %ecx - CFI_RESTORE ecx - popl_cfi %edx - CFI_RESTORE edx - popl_cfi %esi - CFI_RESTORE esi - popl_cfi %edi - CFI_RESTORE edi - popl_cfi %ebp - CFI_RESTORE ebp - popl_cfi %eax - CFI_RESTORE eax -.endm - -.macro RESTORE_REGS pop=0 - RESTORE_INT_REGS -1: popl_cfi %ds - /*CFI_RESTORE ds;*/ -2: popl_cfi %es - /*CFI_RESTORE es;*/ -3: popl_cfi %fs - /*CFI_RESTORE fs;*/ - POP_GS \pop -.pushsection .fixup, "ax" -4: movl $0, (%esp) - jmp 1b -5: movl $0, (%esp) - jmp 2b -6: movl $0, (%esp) - jmp 3b -.section __ex_table, "a" - .align 4 - .long 1b, 4b - .long 2b, 5b - .long 3b, 6b -.popsection - POP_GS_EX -.endm - -.macro RING0_INT_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 3*4 - /*CFI_OFFSET cs, -2*4;*/ - CFI_OFFSET eip, -3*4 -.endm - -.macro RING0_EC_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 4*4 - /*CFI_OFFSET cs, -2*4;*/ - CFI_OFFSET eip, -3*4 -.endm - -.macro RING0_PTREGS_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ - CFI_OFFSET eip, PT_EIP-PT_OLDESP - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ - CFI_OFFSET eax, PT_EAX-PT_OLDESP - CFI_OFFSET ebp, PT_EBP-PT_OLDESP - CFI_OFFSET edi, PT_EDI-PT_OLDESP - CFI_OFFSET esi, PT_ESI-PT_OLDESP - CFI_OFFSET edx, PT_EDX-PT_OLDESP - CFI_OFFSET ecx, PT_ECX-PT_OLDESP - CFI_OFFSET ebx, PT_EBX-PT_OLDESP -.endm - -ENTRY(ret_from_fork) - CFI_STARTPROC - pushl_cfi %eax - call schedule_tail - GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi - jmp syscall_exit - CFI_ENDPROC -END(ret_from_fork) - -/* - * Interrupt exit functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" -/* - * Return to user mode is not as complex as all this looks, - * but we want the default path for a system call return to - * go as quickly as possible which is why some of this is - * less clear than it otherwise should be. - */ - - # userspace resumption stub bypassing syscall exit tracing - ALIGN - RING0_PTREGS_FRAME -ret_from_exception: - preempt_stop(CLBR_ANY) -ret_from_intr: - GET_THREAD_INFO(%ebp) -resume_userspace_sig: -#ifdef CONFIG_VM86 - movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS - movb PT_CS(%esp), %al - andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax -#else - /* - * We can be coming here from a syscall done in the kernel space, - * e.g. a failed kernel_execve(). - */ - movl PT_CS(%esp), %eax - andl $SEGMENT_RPL_MASK, %eax -#endif - cmpl $USER_RPL, %eax - jb resume_kernel # not returning to v8086 or userspace - -ENTRY(resume_userspace) - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done on - # int/exception return? - jne work_pending - jmp restore_all -END(ret_from_exception) - -#ifdef CONFIG_PREEMPT -ENTRY(resume_kernel) - DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all -need_resched: - movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all - testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all - call preempt_schedule_irq - jmp need_resched -END(resume_kernel) -#endif - CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection - -/* SYSENTER_RETURN points to after the "sysenter" instruction in - the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ - - # sysenter call handler stub -ENTRY(ia32_sysenter_target) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 0 - CFI_REGISTER esp, ebp - movl TSS_sysenter_sp0(%esp),%esp -sysenter_past_esp: - /* - * Interrupts are disabled here, but we can't trace it until - * enough kernel state to call TRACE_IRQS_OFF can be called - but - * we immediately enable interrupts at that point anyway. - */ - pushl_cfi $__USER_DS - /*CFI_REL_OFFSET ss, 0*/ - pushl_cfi %ebp - CFI_REL_OFFSET esp, 0 - pushfl_cfi - orl $X86_EFLAGS_IF, (%esp) - pushl_cfi $__USER_CS - /*CFI_REL_OFFSET cs, 0*/ - /* - * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. - */ - pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) - CFI_REL_OFFSET eip, 0 - - pushl_cfi %eax - SAVE_ALL - ENABLE_INTERRUPTS(CLBR_NONE) - -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp - movl %ebp,PT_EBP(%esp) -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - - GET_THREAD_INFO(%ebp) - - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) - jnz sysenter_audit -sysenter_do_call: - cmpl $(NR_syscalls), %eax - jae syscall_badsys - call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx - jne sysexit_audit -sysenter_exit: -/* if something modifies registers it must also disable sysexit */ - movl PT_EIP(%esp), %edx - movl PT_OLDESP(%esp), %ecx - xorl %ebp,%ebp - TRACE_IRQS_ON -1: mov PT_FS(%esp), %fs - PTGS_TO_GS - ENABLE_INTERRUPTS_SYSEXIT - -#ifdef CONFIG_AUDITSYSCALL -sysenter_audit: - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) - jnz syscall_trace_entry - addl $4,%esp - CFI_ADJUST_CFA_OFFSET -4 - /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ - /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ - /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ - movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ - movl %eax,%edx /* 2nd arg: syscall number */ - movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ - call __audit_syscall_entry - pushl_cfi %ebx - movl PT_EAX(%esp),%eax /* reload syscall number */ - jmp sysenter_do_call - -sysexit_audit: - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jne syscall_exit_work - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - movl %eax,%edx /* second arg, syscall return value */ - cmpl $-MAX_ERRNO,%eax /* is it an error ? */ - setbe %al /* 1 if so, 0 if not */ - movzbl %al,%eax /* zero-extend that */ - call __audit_syscall_exit - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jne syscall_exit_work - movl PT_EAX(%esp),%eax /* reload syscall return value */ - jmp sysenter_exit -#endif - - CFI_ENDPROC -.pushsection .fixup,"ax" -2: movl $0,PT_FS(%esp) - jmp 1b -.section __ex_table,"a" - .align 4 - .long 1b,2b -.popsection - PTGS_TO_GS_EX -ENDPROC(ia32_sysenter_target) - -/* - * syscall stub including irq exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - # system call handler stub -ENTRY(system_call) - RING0_INT_FRAME # can't unwind into user space anyway - pushl_cfi %eax # save orig_eax - SAVE_ALL - GET_THREAD_INFO(%ebp) - # system call tracing in operation / emulation - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(NR_syscalls), %eax - jae syscall_badsys -syscall_call: - call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) # store the return value -syscall_exit: - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx # current->work - jne syscall_exit_work - -restore_all: - TRACE_IRQS_IRET -restore_all_notrace: - movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS - # Warning: PT_OLDSS(%esp) contains the wrong/random values if we - # are returning to the kernel. - # See comments in process.c:copy_thread() for details. - movb PT_OLDSS(%esp), %ah - movb PT_CS(%esp), %al - andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax - cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax - CFI_REMEMBER_STATE - je ldt_ss # returning to user-space with LDT SS -restore_nocheck: - RESTORE_REGS 4 # skip orig_eax/error_code -irq_return: - INTERRUPT_RETURN -.section .fixup,"ax" -ENTRY(iret_exc) - pushl $0 # no error code - pushl $do_iret_error - jmp error_code -.previous -.section __ex_table,"a" - .align 4 - .long irq_return,iret_exc -.previous - - CFI_RESTORE_STATE -ldt_ss: - larl PT_OLDSS(%esp), %eax - jnz restore_nocheck - testl $0x00400000, %eax # returning to 32bit stack? - jnz restore_nocheck # allright, normal return - -#ifdef CONFIG_PARAVIRT - /* - * The kernel can't run on a non-flat stack if paravirt mode - * is active. Rather than try to fixup the high bits of - * ESP, bypass this code entirely. This may break DOSemu - * and/or Wine support in a paravirt VM, although the option - * is still available to implement the setting of the high - * 16-bits in the INTERRUPT_RETURN paravirt-op. - */ - cmpl $0, pv_info+PARAVIRT_enabled - jne restore_nocheck -#endif - -/* - * Setup and switch to ESPFIX stack - * - * We're returning to userspace with a 16 bit stack. The CPU will not - * restore the high word of ESP for us on executing iret... This is an - * "official" bug of all the x86-compatible CPUs, which we can work - * around to make dosemu and wine happy. We do this by preloading the - * high word of ESP with the high word of the userspace ESP while - * compensating for the offset by changing to the ESPFIX segment with - * a base address that matches for the difference. - */ -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) - mov %esp, %edx /* load kernel esp */ - mov PT_OLDESP(%esp), %eax /* load userspace esp */ - mov %dx, %ax /* eax: new kernel esp */ - sub %eax, %edx /* offset (low word is 0) */ - shr $16, %edx - mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ - mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ - pushl_cfi $__ESPFIX_SS - pushl_cfi %eax /* new kernel esp */ - /* Disable interrupts, but do not irqtrace this section: we - * will soon execute iret and the tracer was already set to - * the irqstate after the iret */ - DISABLE_INTERRUPTS(CLBR_EAX) - lss (%esp), %esp /* switch to espfix segment */ - CFI_ADJUST_CFA_OFFSET -8 - jmp restore_nocheck - CFI_ENDPROC -ENDPROC(system_call) - - # perform work that needs to be done immediately before resumption - ALIGN - RING0_PTREGS_FRAME # can't unwind into user space anyway -work_pending: - testb $_TIF_NEED_RESCHED, %cl - jz work_notifysig -work_resched: - call schedule - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done other - # than syscall tracing? - jz restore_all - testb $_TIF_NEED_RESCHED, %cl - jnz work_resched - -work_notifysig: # deal with pending signals and - # notify-resume requests -#ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) - movl %esp, %eax - jne work_notifysig_v86 # returning to kernel-space or - # vm86-space - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig - - ALIGN -work_notifysig_v86: - pushl_cfi %ecx # save ti_flags for do_notify_resume - call save_v86_state # %eax contains pt_regs pointer - popl_cfi %ecx - movl %eax, %esp -#else - movl %esp, %eax -#endif - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig -END(work_pending) - - # perform syscall exit tracing - ALIGN -syscall_trace_entry: - movl $-ENOSYS,PT_EAX(%esp) - movl %esp, %eax - call syscall_trace_enter - /* What it returned is what we'll actually use. */ - cmpl $(NR_syscalls), %eax - jnae syscall_call - jmp syscall_exit -END(syscall_trace_entry) - - # perform syscall exit tracing - ALIGN -syscall_exit_work: - testl $_TIF_WORK_SYSCALL_EXIT, %ecx - jz work_pending - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call - # schedule() instead - movl %esp, %eax - call syscall_trace_leave - jmp resume_userspace -END(syscall_exit_work) - CFI_ENDPROC - - RING0_INT_FRAME # can't unwind into user space anyway -syscall_fault: - GET_THREAD_INFO(%ebp) - movl $-EFAULT,PT_EAX(%esp) - jmp resume_userspace -END(syscall_fault) - -syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp resume_userspace -END(syscall_badsys) - CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection - -/* - * System calls that need a pt_regs pointer. - */ -#define PTREGSCALL0(name) \ -ENTRY(ptregs_##name) ; \ - leal 4(%esp),%eax; \ - jmp sys_##name; \ -ENDPROC(ptregs_##name) - -#define PTREGSCALL1(name) \ -ENTRY(ptregs_##name) ; \ - leal 4(%esp),%edx; \ - movl (PT_EBX+4)(%esp),%eax; \ - jmp sys_##name; \ -ENDPROC(ptregs_##name) - -#define PTREGSCALL2(name) \ -ENTRY(ptregs_##name) ; \ - leal 4(%esp),%ecx; \ - movl (PT_ECX+4)(%esp),%edx; \ - movl (PT_EBX+4)(%esp),%eax; \ - jmp sys_##name; \ -ENDPROC(ptregs_##name) - -#define PTREGSCALL3(name) \ -ENTRY(ptregs_##name) ; \ - CFI_STARTPROC; \ - leal 4(%esp),%eax; \ - pushl_cfi %eax; \ - movl PT_EDX(%eax),%ecx; \ - movl PT_ECX(%eax),%edx; \ - movl PT_EBX(%eax),%eax; \ - call sys_##name; \ - addl $4,%esp; \ - CFI_ADJUST_CFA_OFFSET -4; \ - ret; \ - CFI_ENDPROC; \ -ENDPROC(ptregs_##name) - -PTREGSCALL1(iopl) -PTREGSCALL0(fork) -PTREGSCALL0(vfork) -PTREGSCALL3(execve) -PTREGSCALL2(sigaltstack) -PTREGSCALL0(sigreturn) -PTREGSCALL0(rt_sigreturn) -PTREGSCALL2(vm86) -PTREGSCALL1(vm86old) - -/* Clone is an oddball. The 4th arg is in %edi */ -ENTRY(ptregs_clone) - CFI_STARTPROC - leal 4(%esp),%eax - pushl_cfi %eax - pushl_cfi PT_EDI(%eax) - movl PT_EDX(%eax),%ecx - movl PT_ECX(%eax),%edx - movl PT_EBX(%eax),%eax - call sys_clone - addl $8,%esp - CFI_ADJUST_CFA_OFFSET -8 - ret - CFI_ENDPROC -ENDPROC(ptregs_clone) - -.macro FIXUP_ESPFIX_STACK -/* - * Switch back for ESPFIX stack to the normal zerobased stack - * - * We can't call C functions using the ESPFIX stack. This code reads - * the high word of the segment base from the GDT and swiches to the - * normal stack and adjusts ESP with the matching offset. - */ - /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ - shl $16, %eax - addl %esp, %eax /* the adjusted stack pointer */ - pushl_cfi $__KERNEL_DS - pushl_cfi %eax - lss (%esp), %esp /* switch to the normal stack segment */ - CFI_ADJUST_CFA_OFFSET -8 -.endm -.macro UNWIND_ESPFIX_STACK - movl %ss, %eax - /* see if on espfix stack */ - cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es - /* switch to normal stack */ - FIXUP_ESPFIX_STACK -27: -.endm - -/* - * Build the entry stubs and pointer table with some assembler magic. - * We pack 7 stubs into a single 32-byte chunk, which will fit in a - * single cache line on all modern x86 implementations. - */ -.section .init.rodata,"a" -ENTRY(interrupt) -.section .entry.text, "ax" - .p2align 5 - .p2align CONFIG_X86_L1_CACHE_SHIFT -ENTRY(irq_entries_start) - RING0_INT_FRAME -vector=FIRST_EXTERNAL_VECTOR -.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 - .balign 32 - .rept 7 - .if vector < NR_VECTORS - .if vector <> FIRST_EXTERNAL_VECTOR - CFI_ADJUST_CFA_OFFSET -4 - .endif -1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 - jmp 2f - .endif - .previous - .long 1b - .section .entry.text, "ax" -vector=vector+1 - .endif - .endr -2: jmp common_interrupt -.endr -END(irq_entries_start) - -.previous -END(interrupt) -.previous - -/* - * the CPU automatically disables interrupts when executing an IRQ vector, - * so IRQ-flags tracing has to follow that: - */ - .p2align CONFIG_X86_L1_CACHE_SHIFT -common_interrupt: - addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ - SAVE_ALL - TRACE_IRQS_OFF - movl %esp,%eax - call do_IRQ - jmp ret_from_intr -ENDPROC(common_interrupt) - CFI_ENDPROC - -/* - * Irq entries should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" -#define BUILD_INTERRUPT3(name, nr, fn) \ -ENTRY(name) \ - RING0_INT_FRAME; \ - pushl_cfi $~(nr); \ - SAVE_ALL; \ - TRACE_IRQS_OFF \ - movl %esp,%eax; \ - call fn; \ - jmp ret_from_intr; \ - CFI_ENDPROC; \ -ENDPROC(name) - -#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) - -/* The include is where all of the SMP etc. interrupts come from */ -#include <asm/entry_arch.h> - -ENTRY(coprocessor_error) - RING0_INT_FRAME - pushl_cfi $0 - pushl_cfi $do_coprocessor_error - jmp error_code - CFI_ENDPROC -END(coprocessor_error) - -ENTRY(simd_coprocessor_error) - RING0_INT_FRAME - pushl_cfi $0 -#ifdef CONFIG_X86_INVD_BUG - /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ -661: pushl_cfi $do_general_protection -662: -.section .altinstructions,"a" - altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f -.previous -.section .altinstr_replacement,"ax" -663: pushl $do_simd_coprocessor_error -664: -.previous -#else - pushl_cfi $do_simd_coprocessor_error -#endif - jmp error_code - CFI_ENDPROC -END(simd_coprocessor_error) - -ENTRY(device_not_available) - RING0_INT_FRAME - pushl_cfi $-1 # mark this as an int - pushl_cfi $do_device_not_available - jmp error_code - CFI_ENDPROC -END(device_not_available) - -#ifdef CONFIG_PARAVIRT -ENTRY(native_iret) - iret -.section __ex_table,"a" - .align 4 - .long native_iret, iret_exc -.previous -END(native_iret) - -ENTRY(native_irq_enable_sysexit) - sti - sysexit -END(native_irq_enable_sysexit) -#endif - -ENTRY(overflow) - RING0_INT_FRAME - pushl_cfi $0 - pushl_cfi $do_overflow - jmp error_code - CFI_ENDPROC -END(overflow) - -ENTRY(bounds) - RING0_INT_FRAME - pushl_cfi $0 - pushl_cfi $do_bounds - jmp error_code - CFI_ENDPROC -END(bounds) - -ENTRY(invalid_op) - RING0_INT_FRAME - pushl_cfi $0 - pushl_cfi $do_invalid_op - jmp error_code - CFI_ENDPROC -END(invalid_op) - -ENTRY(coprocessor_segment_overrun) - RING0_INT_FRAME - pushl_cfi $0 - pushl_cfi $do_coprocessor_segment_overrun - jmp error_code - CFI_ENDPROC -END(coprocessor_segment_overrun) - -ENTRY(invalid_TSS) - RING0_EC_FRAME - pushl_cfi $do_invalid_TSS - jmp error_code - CFI_ENDPROC -END(invalid_TSS) - -ENTRY(segment_not_present) - RING0_EC_FRAME - pushl_cfi $do_segment_not_present - jmp error_code - CFI_ENDPROC -END(segment_not_present) - -ENTRY(stack_segment) - RING0_EC_FRAME - pushl_cfi $do_stack_segment - jmp error_code - CFI_ENDPROC -END(stack_segment) - -ENTRY(alignment_check) - RING0_EC_FRAME - pushl_cfi $do_alignment_check - jmp error_code - CFI_ENDPROC -END(alignment_check) - -ENTRY(divide_error) - RING0_INT_FRAME - pushl_cfi $0 # no error code - pushl_cfi $do_divide_error - jmp error_code - CFI_ENDPROC -END(divide_error) - -#ifdef CONFIG_X86_MCE -ENTRY(machine_check) - RING0_INT_FRAME - pushl_cfi $0 - pushl_cfi machine_check_vector - jmp error_code - CFI_ENDPROC -END(machine_check) -#endif - -ENTRY(spurious_interrupt_bug) - RING0_INT_FRAME - pushl_cfi $0 - pushl_cfi $do_spurious_interrupt_bug - jmp error_code - CFI_ENDPROC -END(spurious_interrupt_bug) -/* - * End of kprobes section - */ - .popsection - -ENTRY(kernel_thread_helper) - pushl $0 # fake return address for unwinder - CFI_STARTPROC - movl %edi,%eax - call *%esi - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -ENDPROC(kernel_thread_helper) - -#ifdef CONFIG_XEN -/* Xen doesn't set %esp to be precisely what the normal sysenter - entrypoint expects, so fix it up before using the normal path. */ -ENTRY(xen_sysenter_target) - RING0_INT_FRAME - addl $5*4, %esp /* remove xen-provided frame */ - CFI_ADJUST_CFA_OFFSET -5*4 - jmp sysenter_past_esp - CFI_ENDPROC - -ENTRY(xen_hypervisor_callback) - CFI_STARTPROC - pushl_cfi $0 - SAVE_ALL - TRACE_IRQS_OFF - - /* Check to see if we got the event in the critical - region in xen_iret_direct, after we've reenabled - events and checked for pending events. This simulates - iret instruction's behaviour where it delivers a - pending interrupt when enabling interrupts. */ - movl PT_EIP(%esp),%eax - cmpl $xen_iret_start_crit,%eax - jb 1f - cmpl $xen_iret_end_crit,%eax - jae 1f - - jmp xen_iret_crit_fixup - -ENTRY(xen_do_upcall) -1: mov %esp, %eax - call xen_evtchn_do_upcall - jmp ret_from_intr - CFI_ENDPROC -ENDPROC(xen_hypervisor_callback) - -# Hypervisor uses this for application faults while it executes. -# We get here for two reasons: -# 1. Fault while reloading DS, ES, FS or GS -# 2. Fault while executing IRET -# Category 1 we fix up by reattempting the load, and zeroing the segment -# register if the load fails. -# Category 2 we fix up by jumping to do_iret_error. We cannot use the -# normal Linux return path in this case because if we use the IRET hypercall -# to pop the stack frame we end up in an infinite loop of failsafe callbacks. -# We distinguish between categories by maintaining a status value in EAX. -ENTRY(xen_failsafe_callback) - CFI_STARTPROC - pushl_cfi %eax - movl $1,%eax -1: mov 4(%esp),%ds -2: mov 8(%esp),%es -3: mov 12(%esp),%fs -4: mov 16(%esp),%gs - testl %eax,%eax - popl_cfi %eax - lea 16(%esp),%esp - CFI_ADJUST_CFA_OFFSET -16 - jz 5f - addl $16,%esp - jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) -5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment) - SAVE_ALL - jmp ret_from_exception - CFI_ENDPROC - -.section .fixup,"ax" -6: xorl %eax,%eax - movl %eax,4(%esp) - jmp 1b -7: xorl %eax,%eax - movl %eax,8(%esp) - jmp 2b -8: xorl %eax,%eax - movl %eax,12(%esp) - jmp 3b -9: xorl %eax,%eax - movl %eax,16(%esp) - jmp 4b -.previous -.section __ex_table,"a" - .align 4 - .long 1b,6b - .long 2b,7b - .long 3b,8b - .long 4b,9b -.previous -ENDPROC(xen_failsafe_callback) - -BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, - xen_evtchn_do_upcall) - -#endif /* CONFIG_XEN */ - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE - -ENTRY(mcount) - ret -END(mcount) - -ENTRY(ftrace_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - movl 0x4(%ebp), %edx - subl $MCOUNT_INSN_SIZE, %eax - -.globl ftrace_call -ftrace_call: - call ftrace_stub - - popl %edx - popl %ecx - popl %eax -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - jmp ftrace_stub -#endif - -.globl ftrace_stub -ftrace_stub: - ret -END(ftrace_caller) - -#else /* ! CONFIG_DYNAMIC_FTRACE */ - -ENTRY(mcount) - cmpl $0, function_trace_stop - jne ftrace_stub - - cmpl $ftrace_stub, ftrace_trace_function - jnz trace -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - cmpl $ftrace_stub, ftrace_graph_return - jnz ftrace_graph_caller - - cmpl $ftrace_graph_entry_stub, ftrace_graph_entry - jnz ftrace_graph_caller -#endif -.globl ftrace_stub -ftrace_stub: - ret - - /* taken from glibc */ -trace: - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - movl 0x4(%ebp), %edx - subl $MCOUNT_INSN_SIZE, %eax - - call *ftrace_trace_function - - popl %edx - popl %ecx - popl %eax - jmp ftrace_stub -END(mcount) -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FUNCTION_TRACER */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %edx - lea 0x4(%ebp), %eax - movl (%ebp), %ecx - subl $MCOUNT_INSN_SIZE, %edx - call prepare_ftrace_return - popl %edx - popl %ecx - popl %eax - ret -END(ftrace_graph_caller) - -.globl return_to_handler -return_to_handler: - pushl %eax - pushl %edx - movl %ebp, %eax - call ftrace_return_to_handler - movl %eax, %ecx - popl %edx - popl %eax - jmp *%ecx -#endif - -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - -ENTRY(page_fault) - RING0_EC_FRAME - pushl_cfi $do_page_fault - ALIGN -error_code: - /* the function address is in %gs's slot on the stack */ - pushl_cfi %fs - /*CFI_REL_OFFSET fs, 0*/ - pushl_cfi %es - /*CFI_REL_OFFSET es, 0*/ - pushl_cfi %ds - /*CFI_REL_OFFSET ds, 0*/ - pushl_cfi %eax - CFI_REL_OFFSET eax, 0 - pushl_cfi %ebp - CFI_REL_OFFSET ebp, 0 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %edx - CFI_REL_OFFSET edx, 0 - pushl_cfi %ecx - CFI_REL_OFFSET ecx, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 - cld - movl $(__KERNEL_PERCPU), %ecx - movl %ecx, %fs - UNWIND_ESPFIX_STACK - GS_TO_REG %ecx - movl PT_GS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - REG_TO_PTGS %ecx - SET_KERNEL_GS %ecx - movl $(__USER_DS), %ecx - movl %ecx, %ds - movl %ecx, %es - TRACE_IRQS_OFF - movl %esp,%eax # pt_regs pointer - call *%edi - jmp ret_from_exception - CFI_ENDPROC -END(page_fault) - -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -.macro FIX_STACK offset ok label - cmpw $__KERNEL_CS, 4(%esp) - jne \ok -\label: - movl TSS_sysenter_sp0 + \offset(%esp), %esp - CFI_DEF_CFA esp, 0 - CFI_UNDEFINED eip - pushfl_cfi - pushl_cfi $__KERNEL_CS - pushl_cfi $sysenter_past_esp - CFI_REL_OFFSET eip, 0 -.endm - -ENTRY(debug) - RING0_INT_FRAME - cmpl $ia32_sysenter_target,(%esp) - jne debug_stack_correct - FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn -debug_stack_correct: - pushl_cfi $-1 # mark this as an int - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # error code 0 - movl %esp,%eax # pt_regs pointer - call do_debug - jmp ret_from_exception - CFI_ENDPROC -END(debug) - -/* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. - */ -ENTRY(nmi) - RING0_INT_FRAME - pushl_cfi %eax - movl %ss, %eax - cmpw $__ESPFIX_SS, %ax - popl_cfi %eax - je nmi_espfix_stack - cmpl $ia32_sysenter_target,(%esp) - je nmi_stack_fixup - pushl_cfi %eax - movl %esp,%eax - /* Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1),%eax - cmpl $(THREAD_SIZE-20),%eax - popl_cfi %eax - jae nmi_stack_correct - cmpl $ia32_sysenter_target,12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl_cfi %eax - SAVE_ALL - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - jmp restore_all_notrace - CFI_ENDPROC - -nmi_stack_fixup: - RING0_INT_FRAME - FIX_STACK 12, nmi_stack_correct, 1 - jmp nmi_stack_correct - -nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ - cmpw $__KERNEL_CS,16(%esp) - jne nmi_stack_correct - cmpl $debug,(%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn,(%esp) - ja nmi_stack_correct - FIX_STACK 24, nmi_stack_correct, 1 - jmp nmi_stack_correct - -nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * - * create the pointer to lss back - */ - pushl_cfi %ss - pushl_cfi %esp - addl $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl_cfi 16(%esp) - .endr - pushl_cfi %eax - SAVE_ALL - FIXUP_ESPFIX_STACK # %eax == %esp - xorl %edx,%edx # zero error code - call do_nmi - RESTORE_REGS - lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 - jmp irq_return - CFI_ENDPROC -END(nmi) - -ENTRY(int3) - RING0_INT_FRAME - pushl_cfi $-1 # mark this as an int - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_int3 - jmp ret_from_exception - CFI_ENDPROC -END(int3) - -ENTRY(general_protection) - RING0_EC_FRAME - pushl_cfi $do_general_protection - jmp error_code - CFI_ENDPROC -END(general_protection) - -#ifdef CONFIG_KVM_GUEST -ENTRY(async_page_fault) - RING0_EC_FRAME - pushl_cfi $do_async_page_fault - jmp error_code - CFI_ENDPROC -END(async_page_fault) -#endif - -/* - * End of kprobes section - */ - .popsection diff --git a/ANDROID_3.4.5/arch/x86/kernel/entry_64.S b/ANDROID_3.4.5/arch/x86/kernel/entry_64.S deleted file mode 100644 index cdc79b5c..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/entry_64.S +++ /dev/null @@ -1,1757 +0,0 @@ -/* - * linux/arch/x86_64/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs - * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * - * Some of this is documented in Documentation/x86/entry_64.txt - * - * NOTE: This code handles signal-recognition, which happens every time - * after an interrupt and after each system call. - * - * Normal syscalls and interrupts don't save a full stack frame, this is - * only done for syscall tracing, signals or fork/exec et.al. - * - * A note on terminology: - * - top of stack: Architecture defined interrupt frame from SS to RIP - * at the top of the kernel process stack. - * - partial stack frame: partially saved registers up to R11. - * - full stack frame: Like partial stack frame, but all register saved. - * - * Some macro usage: - * - CFI macros are used to generate dwarf2 unwind information for better - * backtraces. They don't change any code. - * - SAVE_ALL/RESTORE_ALL - Save/restore all registers - * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. - * There are unfortunately lots of special cases where some registers - * not touched. The macro is a big mess that should be cleaned up. - * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. - * Gives a full stack frame. - * - ENTRY/END Define functions in the symbol table. - * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack - * frame that is otherwise undefined after a SYSCALL - * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. - * - errorentry/paranoidentry/zeroentry - Define exception entry points. - */ - -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/cache.h> -#include <asm/errno.h> -#include <asm/dwarf2.h> -#include <asm/calling.h> -#include <asm/asm-offsets.h> -#include <asm/msr.h> -#include <asm/unistd.h> -#include <asm/thread_info.h> -#include <asm/hw_irq.h> -#include <asm/page_types.h> -#include <asm/irqflags.h> -#include <asm/paravirt.h> -#include <asm/ftrace.h> -#include <asm/percpu.h> -#include <linux/err.h> - -/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ -#include <linux/elf-em.h> -#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) -#define __AUDIT_ARCH_64BIT 0x80000000 -#define __AUDIT_ARCH_LE 0x40000000 - - .code64 - .section .entry.text, "ax" - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) - retq -END(mcount) - -ENTRY(ftrace_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi - -GLOBAL(ftrace_call) - call ftrace_stub - - MCOUNT_RESTORE_FRAME - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -GLOBAL(ftrace_graph_call) - jmp ftrace_stub -#endif - -GLOBAL(ftrace_stub) - retq -END(ftrace_caller) - -#else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) - cmpl $0, function_trace_stop - jne ftrace_stub - - cmpq $ftrace_stub, ftrace_trace_function - jnz trace - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - cmpq $ftrace_stub, ftrace_graph_return - jnz ftrace_graph_caller - - cmpq $ftrace_graph_entry_stub, ftrace_graph_entry - jnz ftrace_graph_caller -#endif - -GLOBAL(ftrace_stub) - retq - -trace: - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi - - call *ftrace_trace_function - - MCOUNT_RESTORE_FRAME - - jmp ftrace_stub -END(mcount) -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FUNCTION_TRACER */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - - MCOUNT_SAVE_FRAME - - leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi - movq (%rbp), %rdx - subq $MCOUNT_INSN_SIZE, %rsi - - call prepare_ftrace_return - - MCOUNT_RESTORE_FRAME - - retq -END(ftrace_graph_caller) - -GLOBAL(return_to_handler) - subq $24, %rsp - - /* Save the return values */ - movq %rax, (%rsp) - movq %rdx, 8(%rsp) - movq %rbp, %rdi - - call ftrace_return_to_handler - - movq %rax, %rdi - movq 8(%rsp), %rdx - movq (%rsp), %rax - addq $24, %rsp - jmp *%rdi -#endif - - -#ifndef CONFIG_PREEMPT -#define retint_kernel retint_restore_args -#endif - -#ifdef CONFIG_PARAVIRT -ENTRY(native_usergs_sysret64) - swapgs - sysretq -ENDPROC(native_usergs_sysret64) -#endif /* CONFIG_PARAVIRT */ - - -.macro TRACE_IRQS_IRETQ offset=ARGOFFSET -#ifdef CONFIG_TRACE_IRQFLAGS - bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ - jnc 1f - TRACE_IRQS_ON -1: -#endif -.endm - -/* - * C code is not supposed to know about undefined top of stack. Every time - * a C function with an pt_regs argument is called from the SYSCALL based - * fast path FIXUP_TOP_OF_STACK is needed. - * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs - * manipulation. - */ - - /* %rsp:at FRAMEEND */ - .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq PER_CPU_VAR(old_rsp),\tmp - movq \tmp,RSP+\offset(%rsp) - movq $__USER_DS,SS+\offset(%rsp) - movq $__USER_CS,CS+\offset(%rsp) - movq $-1,RCX+\offset(%rsp) - movq R11+\offset(%rsp),\tmp /* get eflags */ - movq \tmp,EFLAGS+\offset(%rsp) - .endm - - .macro RESTORE_TOP_OF_STACK tmp offset=0 - movq RSP+\offset(%rsp),\tmp - movq \tmp,PER_CPU_VAR(old_rsp) - movq EFLAGS+\offset(%rsp),\tmp - movq \tmp,R11+\offset(%rsp) - .endm - - .macro FAKE_STACK_FRAME child_rip - /* push in order ss, rsp, eflags, cs, rip */ - xorl %eax, %eax - pushq_cfi $__KERNEL_DS /* ss */ - /*CFI_REL_OFFSET ss,0*/ - pushq_cfi %rax /* rsp */ - CFI_REL_OFFSET rsp,0 - pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ - /*CFI_REL_OFFSET rflags,0*/ - pushq_cfi $__KERNEL_CS /* cs */ - /*CFI_REL_OFFSET cs,0*/ - pushq_cfi \child_rip /* rip */ - CFI_REL_OFFSET rip,0 - pushq_cfi %rax /* orig rax */ - .endm - - .macro UNFAKE_STACK_FRAME - addq $8*6, %rsp - CFI_ADJUST_CFA_OFFSET -(6*8) - .endm - -/* - * initial frame state for interrupts (and exceptions without error code) - */ - .macro EMPTY_FRAME start=1 offset=0 - .if \start - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,8+\offset - .else - CFI_DEF_CFA_OFFSET 8+\offset - .endif - .endm - -/* - * initial frame state for interrupts (and exceptions without error code) - */ - .macro INTR_FRAME start=1 offset=0 - EMPTY_FRAME \start, SS+8+\offset-RIP - /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ - CFI_REL_OFFSET rsp, RSP+\offset-RIP - /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ - /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ - CFI_REL_OFFSET rip, RIP+\offset-RIP - .endm - -/* - * initial frame state for exceptions with error code (and interrupts - * with vector already pushed) - */ - .macro XCPT_FRAME start=1 offset=0 - INTR_FRAME \start, RIP+\offset-ORIG_RAX - /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ - .endm - -/* - * frame that enables calling into C. - */ - .macro PARTIAL_FRAME start=1 offset=0 - XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET - CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET - CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET - CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET - CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET - CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET - CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET - CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET - CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET - CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET - .endm - -/* - * frame that enables passing a complete pt_regs to a C function. - */ - .macro DEFAULT_FRAME start=1 offset=0 - PARTIAL_FRAME \start, R11+\offset-R15 - CFI_REL_OFFSET rbx, RBX+\offset - CFI_REL_OFFSET rbp, RBP+\offset - CFI_REL_OFFSET r12, R12+\offset - CFI_REL_OFFSET r13, R13+\offset - CFI_REL_OFFSET r14, R14+\offset - CFI_REL_OFFSET r15, R15+\offset - .endm - -/* save partial stack frame */ - .macro SAVE_ARGS_IRQ - cld - /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, RDI-RBP - movq_cfi rsi, RSI-RBP - movq_cfi rdx, RDX-RBP - movq_cfi rcx, RCX-RBP - movq_cfi rax, RAX-RBP - movq_cfi r8, R8-RBP - movq_cfi r9, R9-RBP - movq_cfi r10, R10-RBP - movq_cfi r11, R11-RBP - - /* Save rbp so that we can unwind from get_irq_regs() */ - movq_cfi rbp, 0 - - /* Save previous stack value */ - movq %rsp, %rsi - - leaq -RBP(%rsp),%rdi /* arg1 for handler */ - testl $3, CS-RBP(%rsi) - je 1f - SWAPGS - /* - * irq_count is used to check if a CPU is already on an interrupt stack - * or not. While this is essentially redundant with preempt_count it is - * a little cheaper to use a separate counter in the PDA (short of - * moving irq_enter into assembly, which would be too much work) - */ -1: incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - CFI_DEF_CFA_REGISTER rsi - - /* Store previous stack value */ - pushq %rsi - CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ - 0x77 /* DW_OP_breg7 */, 0, \ - 0x06 /* DW_OP_deref */, \ - 0x08 /* DW_OP_const1u */, SS+8-RBP, \ - 0x22 /* DW_OP_plus */ - /* We entered an interrupt context - irqs are off: */ - TRACE_IRQS_OFF - .endm - -ENTRY(save_rest) - PARTIAL_FRAME 1 REST_SKIP+8 - movq 5*8+16(%rsp), %r11 /* save return address */ - movq_cfi rbx, RBX+16 - movq_cfi rbp, RBP+16 - movq_cfi r12, R12+16 - movq_cfi r13, R13+16 - movq_cfi r14, R14+16 - movq_cfi r15, R15+16 - movq %r11, 8(%rsp) /* return address */ - FIXUP_TOP_OF_STACK %r11, 16 - ret - CFI_ENDPROC -END(save_rest) - -/* save complete stack frame */ - .pushsection .kprobes.text, "ax" -ENTRY(save_paranoid) - XCPT_FRAME 1 RDI+8 - cld - movq_cfi rdi, RDI+8 - movq_cfi rsi, RSI+8 - movq_cfi rdx, RDX+8 - movq_cfi rcx, RCX+8 - movq_cfi rax, RAX+8 - movq_cfi r8, R8+8 - movq_cfi r9, R9+8 - movq_cfi r10, R10+8 - movq_cfi r11, R11+8 - movq_cfi rbx, RBX+8 - movq_cfi rbp, RBP+8 - movq_cfi r12, R12+8 - movq_cfi r13, R13+8 - movq_cfi r14, R14+8 - movq_cfi r15, R15+8 - movl $1,%ebx - movl $MSR_GS_BASE,%ecx - rdmsr - testl %edx,%edx - js 1f /* negative -> in kernel */ - SWAPGS - xorl %ebx,%ebx -1: ret - CFI_ENDPROC -END(save_paranoid) - .popsection - -/* - * A newly forked process directly context switches into this address. - * - * rdi: prev task we switched from - */ -ENTRY(ret_from_fork) - DEFAULT_FRAME - - LOCK ; btr $TIF_FORK,TI_flags(%r8) - - pushq_cfi kernel_eflags(%rip) - popfq_cfi # reset kernel eflags - - call schedule_tail # rdi: 'prev' task parameter - - GET_THREAD_INFO(%rcx) - - RESTORE_REST - - testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? - jz retint_restore_args - - testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET - jnz int_ret_from_sys_call - - RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET - jmp ret_from_sys_call # go to the SYSRET fastpath - - CFI_ENDPROC -END(ret_from_fork) - -/* - * System call entry. Up to 6 arguments in registers are supported. - * - * SYSCALL does not save anything on the stack and does not change the - * stack pointer. - */ - -/* - * Register setup: - * rax system call number - * rdi arg0 - * rcx return address for syscall/sysret, C arg3 - * rsi arg1 - * rdx arg2 - * r10 arg3 (--> moved to rcx for C) - * r8 arg4 - * r9 arg5 - * r11 eflags for syscall/sysret, temporary for C - * r12-r15,rbp,rbx saved by C code, not touched. - * - * Interrupts are off on entry. - * Only called from user space. - * - * XXX if we had a free scratch register we could save the RSP into the stack frame - * and report it properly in ps. Unfortunately we haven't. - * - * When user can change the frames always force IRET. That is because - * it deals with uncanonical addresses better. SYSRET has trouble - * with them due to bugs in both AMD and Intel CPUs. - */ - -ENTRY(system_call) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ - SWAPGS_UNSAFE_STACK - /* - * A hypervisor implementation might want to use a label - * after the swapgs, so that it can do the swapgs - * for the guest and jump here on syscall. - */ -GLOBAL(system_call_after_swapgs) - - movq %rsp,PER_CPU_VAR(old_rsp) - movq PER_CPU_VAR(kernel_stack),%rsp - /* - * No need to follow this irqs off/on section - it's straight - * and short: - */ - ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0 - movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - movq %rcx,RIP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rip,RIP-ARGOFFSET - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jnz tracesys -system_call_fastpath: -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max,%rax -#else - andl $__SYSCALL_MASK,%eax - cmpl $__NR_syscall_max,%eax -#endif - ja badsys - movq %r10,%rcx - call *sys_call_table(,%rax,8) # XXX: rip relative - movq %rax,RAX-ARGOFFSET(%rsp) -/* - * Syscall return path ending with SYSRET (fast path) - * Has incomplete stack frame and undefined top of stack. - */ -ret_from_sys_call: - movl $_TIF_ALLWORK_MASK,%edi - /* edi: flagmask */ -sysret_check: - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx - andl %edi,%edx - jnz sysret_careful - CFI_REMEMBER_STATE - /* - * sysretq will re-enable interrupts: - */ - TRACE_IRQS_ON - movq RIP-ARGOFFSET(%rsp),%rcx - CFI_REGISTER rip,rcx - RESTORE_ARGS 1,-ARG_SKIP,0 - /*CFI_REGISTER rflags,r11*/ - movq PER_CPU_VAR(old_rsp), %rsp - USERGS_SYSRET64 - - CFI_RESTORE_STATE - /* Handle reschedules */ - /* edx: work, edi: workmask */ -sysret_careful: - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi - call schedule - popq_cfi %rdi - jmp sysret_check - - /* Handle a signal */ -sysret_signal: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#ifdef CONFIG_AUDITSYSCALL - bt $TIF_SYSCALL_AUDIT,%edx - jc sysret_audit -#endif - /* - * We have a signal, or exit tracing or single-step. - * These all wind up with the iret return path anyway, - * so just join that path right now. - */ - FIXUP_TOP_OF_STACK %r11, -ARGOFFSET - jmp int_check_syscall_exit_work - -badsys: - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp ret_from_sys_call - -#ifdef CONFIG_AUDITSYSCALL - /* - * Fast path for syscall audit without full syscall trace. - * We just call __audit_syscall_entry() directly, and then - * jump back to the normal fast path. - */ -auditsys: - movq %r10,%r9 /* 6th arg: 4th syscall arg */ - movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ - movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ - movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ - movq %rax,%rsi /* 2nd arg: syscall number */ - movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ - call __audit_syscall_entry - LOAD_ARGS 0 /* reload call-clobbered registers */ - jmp system_call_fastpath - - /* - * Return fast path for syscall audit. Call __audit_syscall_exit() - * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT - * masked off. - */ -sysret_audit: - movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ - cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ - setbe %al /* 1 if so, 0 if not */ - movzbl %al,%edi /* zero-extend that into %edi */ - call __audit_syscall_exit - movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - jmp sysret_check -#endif /* CONFIG_AUDITSYSCALL */ - - /* Do syscall tracing */ -tracesys: -#ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jz auditsys -#endif - SAVE_REST - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ - FIXUP_TOP_OF_STACK %rdi - movq %rsp,%rdi - call syscall_trace_enter - /* - * Reload arg registers from stack in case ptrace changed them. - * We don't reload %rax because syscall_trace_enter() returned - * the value it wants us to use in the table lookup. - */ - LOAD_ARGS ARGOFFSET, 1 - RESTORE_REST -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max,%rax -#else - andl $__SYSCALL_MASK,%eax - cmpl $__NR_syscall_max,%eax -#endif - ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ - movq %r10,%rcx /* fixup for C */ - call *sys_call_table(,%rax,8) - movq %rax,RAX-ARGOFFSET(%rsp) - /* Use IRET because user could have changed frame */ - -/* - * Syscall return path ending with IRET. - * Has correct top of stack, but partial stack frame. - */ -GLOBAL(int_ret_from_sys_call) - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - movl $_TIF_ALLWORK_MASK,%edi - /* edi: mask to check */ -GLOBAL(int_with_check) - LOCKDEP_SYS_EXIT_IRQ - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%edx - andl %edi,%edx - jnz int_careful - andl $~TS_COMPAT,TI_status(%rcx) - jmp retint_swapgs - - /* Either reschedule or signal or syscall exit tracking needed. */ - /* First do a reschedule test. */ - /* edx: work, edi: workmask */ -int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi - call schedule - popq_cfi %rdi - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp int_with_check - - /* handle signals and tracing -- both require a full stack frame */ -int_very_careful: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -int_check_syscall_exit_work: - SAVE_REST - /* Check for syscall exit trace */ - testl $_TIF_WORK_SYSCALL_EXIT,%edx - jz int_signal - pushq_cfi %rdi - leaq 8(%rsp),%rdi # &ptregs -> arg1 - call syscall_trace_leave - popq_cfi %rdi - andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi - jmp int_restore_rest - -int_signal: - testl $_TIF_DO_NOTIFY_MASK,%edx - jz 1f - movq %rsp,%rdi # &ptregs -> arg1 - xorl %esi,%esi # oldset -> arg2 - call do_notify_resume -1: movl $_TIF_WORK_MASK,%edi -int_restore_rest: - RESTORE_REST - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp int_with_check - CFI_ENDPROC -END(system_call) - -/* - * Certain special system calls that need to save a complete full stack frame. - */ - .macro PTREGSCALL label,func,arg -ENTRY(\label) - PARTIAL_FRAME 1 8 /* offset 8: return address */ - subq $REST_SKIP, %rsp - CFI_ADJUST_CFA_OFFSET REST_SKIP - call save_rest - DEFAULT_FRAME 0 8 /* offset 8: return address */ - leaq 8(%rsp), \arg /* pt_regs pointer */ - call \func - jmp ptregscall_common - CFI_ENDPROC -END(\label) - .endm - - PTREGSCALL stub_clone, sys_clone, %r8 - PTREGSCALL stub_fork, sys_fork, %rdi - PTREGSCALL stub_vfork, sys_vfork, %rdi - PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx - PTREGSCALL stub_iopl, sys_iopl, %rsi - -ENTRY(ptregscall_common) - DEFAULT_FRAME 1 8 /* offset 8: return address */ - RESTORE_TOP_OF_STACK %r11, 8 - movq_cfi_restore R15+8, r15 - movq_cfi_restore R14+8, r14 - movq_cfi_restore R13+8, r13 - movq_cfi_restore R12+8, r12 - movq_cfi_restore RBP+8, rbp - movq_cfi_restore RBX+8, rbx - ret $REST_SKIP /* pop extended registers */ - CFI_ENDPROC -END(ptregscall_common) - -ENTRY(stub_execve) - CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 - movq %rsp, %rcx - call sys_execve - RESTORE_TOP_OF_STACK %r11 - movq %rax,RAX(%rsp) - RESTORE_REST - jmp int_ret_from_sys_call - CFI_ENDPROC -END(stub_execve) - -/* - * sigreturn is special because it needs to restore all registers on return. - * This cannot be done with SYSRET, so use the IRET return path instead. - */ -ENTRY(stub_rt_sigreturn) - CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - movq %rsp,%rdi - FIXUP_TOP_OF_STACK %r11 - call sys_rt_sigreturn - movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer - RESTORE_REST - jmp int_ret_from_sys_call - CFI_ENDPROC -END(stub_rt_sigreturn) - -#ifdef CONFIG_X86_X32_ABI - PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx - -ENTRY(stub_x32_rt_sigreturn) - CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - movq %rsp,%rdi - FIXUP_TOP_OF_STACK %r11 - call sys32_x32_rt_sigreturn - movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer - RESTORE_REST - jmp int_ret_from_sys_call - CFI_ENDPROC -END(stub_x32_rt_sigreturn) - -ENTRY(stub_x32_execve) - CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 - movq %rsp, %rcx - call sys32_execve - RESTORE_TOP_OF_STACK %r11 - movq %rax,RAX(%rsp) - RESTORE_REST - jmp int_ret_from_sys_call - CFI_ENDPROC -END(stub_x32_execve) - -#endif - -/* - * Build the entry stubs and pointer table with some assembler magic. - * We pack 7 stubs into a single 32-byte chunk, which will fit in a - * single cache line on all modern x86 implementations. - */ - .section .init.rodata,"a" -ENTRY(interrupt) - .section .entry.text - .p2align 5 - .p2align CONFIG_X86_L1_CACHE_SHIFT -ENTRY(irq_entries_start) - INTR_FRAME -vector=FIRST_EXTERNAL_VECTOR -.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 - .balign 32 - .rept 7 - .if vector < NR_VECTORS - .if vector <> FIRST_EXTERNAL_VECTOR - CFI_ADJUST_CFA_OFFSET -8 - .endif -1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 - jmp 2f - .endif - .previous - .quad 1b - .section .entry.text -vector=vector+1 - .endif - .endr -2: jmp common_interrupt -.endr - CFI_ENDPROC -END(irq_entries_start) - -.previous -END(interrupt) -.previous - -/* - * Interrupt entry/exit. - * - * Interrupt entry points save only callee clobbered registers in fast path. - * - * Entry runs with interrupts off. - */ - -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func - /* reserve pt_regs for scratch regs and rbp */ - subq $ORIG_RAX-RBP, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP - SAVE_ARGS_IRQ - call \func - .endm - -/* - * Interrupt entry/exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - /* - * The interrupt stubs push (~vector+0x80) onto the stack and - * then jump to common_interrupt. - */ - .p2align CONFIG_X86_L1_CACHE_SHIFT -common_interrupt: - XCPT_FRAME - addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ - interrupt do_IRQ - /* 0(%rsp): old_rsp-ARGOFFSET */ -ret_from_intr: - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - decl PER_CPU_VAR(irq_count) - - /* Restore saved previous stack */ - popq %rsi - CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ - leaq ARGOFFSET-RBP(%rsi), %rsp - CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET - -exit_intr: - GET_THREAD_INFO(%rcx) - testl $3,CS-ARGOFFSET(%rsp) - je retint_kernel - - /* Interrupt came from user space */ - /* - * Has a correct top of stack, but a partial stack frame - * %rcx: thread info. Interrupts off. - */ -retint_with_reschedule: - movl $_TIF_WORK_MASK,%edi -retint_check: - LOCKDEP_SYS_EXIT_IRQ - movl TI_flags(%rcx),%edx - andl %edi,%edx - CFI_REMEMBER_STATE - jnz retint_careful - -retint_swapgs: /* return to user-space */ - /* - * The iretq could re-enable interrupts: - */ - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_IRETQ - SWAPGS - jmp restore_args - -retint_restore_args: /* return to kernel space */ - DISABLE_INTERRUPTS(CLBR_ANY) - /* - * The iretq could re-enable interrupts: - */ - TRACE_IRQS_IRETQ -restore_args: - RESTORE_ARGS 1,8,1 - -irq_return: - INTERRUPT_RETURN - - .section __ex_table, "a" - .quad irq_return, bad_iret - .previous - -#ifdef CONFIG_PARAVIRT -ENTRY(native_iret) - iretq - - .section __ex_table,"a" - .quad native_iret, bad_iret - .previous -#endif - - .section .fixup,"ax" -bad_iret: - /* - * The iret traps when the %cs or %ss being restored is bogus. - * We've lost the original trap vector and error code. - * #GPF is the most likely one to get for an invalid selector. - * So pretend we completed the iret and took the #GPF in user mode. - * - * We are now running with the kernel GS after exception recovery. - * But error_entry expects us to have user GS to match the user %cs, - * so swap back. - */ - pushq $0 - - SWAPGS - jmp general_protection - - .previous - - /* edi: workmask, edx: work */ -retint_careful: - CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi - call schedule - popq_cfi %rdi - GET_THREAD_INFO(%rcx) - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp retint_check - -retint_signal: - testl $_TIF_DO_NOTIFY_MASK,%edx - jz retint_swapgs - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_REST - movq $-1,ORIG_RAX(%rsp) - xorl %esi,%esi # oldset - movq %rsp,%rdi # &pt_regs - call do_notify_resume - RESTORE_REST - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - GET_THREAD_INFO(%rcx) - jmp retint_with_reschedule - -#ifdef CONFIG_PREEMPT - /* Returning to kernel space. Check if we need preemption */ - /* rcx: threadinfo. interrupts off. */ -ENTRY(retint_kernel) - cmpl $0,TI_preempt_count(%rcx) - jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) - jnc retint_restore_args - bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ - jnc retint_restore_args - call preempt_schedule_irq - jmp exit_intr -#endif - - CFI_ENDPROC -END(common_interrupt) -/* - * End of kprobes section - */ - .popsection - -/* - * APIC interrupts. - */ -.macro apicinterrupt num sym do_sym -ENTRY(\sym) - INTR_FRAME - pushq_cfi $~(\num) -.Lcommon_\sym: - interrupt \do_sym - jmp ret_from_intr - CFI_ENDPROC -END(\sym) -.endm - -#ifdef CONFIG_SMP -apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ - irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt -apicinterrupt REBOOT_VECTOR \ - reboot_interrupt smp_reboot_interrupt -#endif - -#ifdef CONFIG_X86_UV -apicinterrupt UV_BAU_MESSAGE \ - uv_bau_message_intr1 uv_bau_message_interrupt -#endif -apicinterrupt LOCAL_TIMER_VECTOR \ - apic_timer_interrupt smp_apic_timer_interrupt -apicinterrupt X86_PLATFORM_IPI_VECTOR \ - x86_platform_ipi smp_x86_platform_ipi - -#ifdef CONFIG_SMP - ALIGN - INTR_FRAME -.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 -.if NUM_INVALIDATE_TLB_VECTORS > \idx -ENTRY(invalidate_interrupt\idx) - pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) - jmp .Lcommon_invalidate_interrupt0 - CFI_ADJUST_CFA_OFFSET -8 -END(invalidate_interrupt\idx) -.endif -.endr - CFI_ENDPROC -apicinterrupt INVALIDATE_TLB_VECTOR_START, \ - invalidate_interrupt0, smp_invalidate_interrupt -#endif - -apicinterrupt THRESHOLD_APIC_VECTOR \ - threshold_interrupt smp_threshold_interrupt -apicinterrupt THERMAL_APIC_VECTOR \ - thermal_interrupt smp_thermal_interrupt - -#ifdef CONFIG_SMP -apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ - call_function_single_interrupt smp_call_function_single_interrupt -apicinterrupt CALL_FUNCTION_VECTOR \ - call_function_interrupt smp_call_function_interrupt -apicinterrupt RESCHEDULE_VECTOR \ - reschedule_interrupt smp_reschedule_interrupt -#endif - -apicinterrupt ERROR_APIC_VECTOR \ - error_interrupt smp_error_interrupt -apicinterrupt SPURIOUS_APIC_VECTOR \ - spurious_interrupt smp_spurious_interrupt - -#ifdef CONFIG_IRQ_WORK -apicinterrupt IRQ_WORK_VECTOR \ - irq_work_interrupt smp_irq_work_interrupt -#endif - -/* - * Exception entry points. - */ -.macro zeroentry sym do_sym -ENTRY(\sym) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 - call error_entry - DEFAULT_FRAME 0 - movq %rsp,%rdi /* pt_regs pointer */ - xorl %esi,%esi /* no error code */ - call \do_sym - jmp error_exit /* %ebx: no swapgs flag */ - CFI_ENDPROC -END(\sym) -.endm - -.macro paranoidzeroentry sym do_sym -ENTRY(\sym) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 - call save_paranoid - TRACE_IRQS_OFF - movq %rsp,%rdi /* pt_regs pointer */ - xorl %esi,%esi /* no error code */ - call \do_sym - jmp paranoid_exit /* %ebx: no swapgs flag */ - CFI_ENDPROC -END(\sym) -.endm - -#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) -.macro paranoidzeroentry_ist sym do_sym ist -ENTRY(\sym) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 - call save_paranoid - TRACE_IRQS_OFF - movq %rsp,%rdi /* pt_regs pointer */ - xorl %esi,%esi /* no error code */ - subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) - call \do_sym - addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) - jmp paranoid_exit /* %ebx: no swapgs flag */ - CFI_ENDPROC -END(\sym) -.endm - -.macro errorentry sym do_sym -ENTRY(\sym) - XCPT_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 - call error_entry - DEFAULT_FRAME 0 - movq %rsp,%rdi /* pt_regs pointer */ - movq ORIG_RAX(%rsp),%rsi /* get error code */ - movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ - call \do_sym - jmp error_exit /* %ebx: no swapgs flag */ - CFI_ENDPROC -END(\sym) -.endm - - /* error code is on the stack already */ -.macro paranoiderrorentry sym do_sym -ENTRY(\sym) - XCPT_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 - call save_paranoid - DEFAULT_FRAME 0 - TRACE_IRQS_OFF - movq %rsp,%rdi /* pt_regs pointer */ - movq ORIG_RAX(%rsp),%rsi /* get error code */ - movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ - call \do_sym - jmp paranoid_exit /* %ebx: no swapgs flag */ - CFI_ENDPROC -END(\sym) -.endm - -zeroentry divide_error do_divide_error -zeroentry overflow do_overflow -zeroentry bounds do_bounds -zeroentry invalid_op do_invalid_op -zeroentry device_not_available do_device_not_available -paranoiderrorentry double_fault do_double_fault -zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun -errorentry invalid_TSS do_invalid_TSS -errorentry segment_not_present do_segment_not_present -zeroentry spurious_interrupt_bug do_spurious_interrupt_bug -zeroentry coprocessor_error do_coprocessor_error -errorentry alignment_check do_alignment_check -zeroentry simd_coprocessor_error do_simd_coprocessor_error - - - /* Reload gs selector with exception handling */ - /* edi: new selector */ -ENTRY(native_load_gs_index) - CFI_STARTPROC - pushfq_cfi - DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) - SWAPGS -gs_change: - movl %edi,%gs -2: mfence /* workaround */ - SWAPGS - popfq_cfi - ret - CFI_ENDPROC -END(native_load_gs_index) - - .section __ex_table,"a" - .align 8 - .quad gs_change,bad_gs - .previous - .section .fixup,"ax" - /* running with kernelgs */ -bad_gs: - SWAPGS /* switch back to user gs */ - xorl %eax,%eax - movl %eax,%gs - jmp 2b - .previous - -ENTRY(kernel_thread_helper) - pushq $0 # fake return address - CFI_STARTPROC - /* - * Here we are in the child and the registers are set as they were - * at kernel_thread() invocation in the parent. - */ - call *%rsi - # exit - mov %eax, %edi - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -END(kernel_thread_helper) - -/* - * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. - * - * C extern interface: - * extern long execve(const char *name, char **argv, char **envp) - * - * asm input arguments: - * rdi: name, rsi: argv, rdx: envp - * - * We want to fallback into: - * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) - * - * do_sys_execve asm fallback arguments: - * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack - */ -ENTRY(kernel_execve) - CFI_STARTPROC - FAKE_STACK_FRAME $0 - SAVE_ALL - movq %rsp,%rcx - call sys_execve - movq %rax, RAX(%rsp) - RESTORE_REST - testq %rax,%rax - je int_ret_from_sys_call - RESTORE_ARGS - UNFAKE_STACK_FRAME - ret - CFI_ENDPROC -END(kernel_execve) - -/* Call softirq on interrupt stack. Interrupts are off. */ -ENTRY(call_softirq) - CFI_STARTPROC - pushq_cfi %rbp - CFI_REL_OFFSET rbp,0 - mov %rsp,%rbp - CFI_DEF_CFA_REGISTER rbp - incl PER_CPU_VAR(irq_count) - cmove PER_CPU_VAR(irq_stack_ptr),%rsp - push %rbp # backlink for old unwinder - call __do_softirq - leaveq - CFI_RESTORE rbp - CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET -8 - decl PER_CPU_VAR(irq_count) - ret - CFI_ENDPROC -END(call_softirq) - -#ifdef CONFIG_XEN -zeroentry xen_hypervisor_callback xen_do_hypervisor_callback - -/* - * A note on the "critical region" in our callback handler. - * We want to avoid stacking callback handlers due to events occurring - * during handling of the last event. To do this, we keep events disabled - * until we've done all processing. HOWEVER, we must enable events before - * popping the stack frame (can't be done atomically) and so it would still - * be possible to get enough handler activations to overflow the stack. - * Although unlikely, bugs of that kind are hard to track down, so we'd - * like to avoid the possibility. - * So, on entry to the handler we detect whether we interrupted an - * existing activation in its critical region -- if so, we pop the current - * activation and restart the handler using the previous one. - */ -ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) - CFI_STARTPROC -/* - * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will - * see the correct pointer to the pt_regs - */ - movq %rdi, %rsp # we don't return, adjust the stack frame - CFI_ENDPROC - DEFAULT_FRAME -11: incl PER_CPU_VAR(irq_count) - movq %rsp,%rbp - CFI_DEF_CFA_REGISTER rbp - cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - pushq %rbp # backlink for old unwinder - call xen_evtchn_do_upcall - popq %rsp - CFI_DEF_CFA_REGISTER rsp - decl PER_CPU_VAR(irq_count) - jmp error_exit - CFI_ENDPROC -END(xen_do_hypervisor_callback) - -/* - * Hypervisor uses this for application faults while it executes. - * We get here for two reasons: - * 1. Fault while reloading DS, ES, FS or GS - * 2. Fault while executing IRET - * Category 1 we do not need to fix up as Xen has already reloaded all segment - * registers that could be reloaded and zeroed the others. - * Category 2 we fix up by killing the current process. We cannot use the - * normal Linux return path in this case because if we use the IRET hypercall - * to pop the stack frame we end up in an infinite loop of failsafe callbacks. - * We distinguish between categories by comparing each saved segment register - * with its current contents: any discrepancy means we in category 1. - */ -ENTRY(xen_failsafe_callback) - INTR_FRAME 1 (6*8) - /*CFI_REL_OFFSET gs,GS*/ - /*CFI_REL_OFFSET fs,FS*/ - /*CFI_REL_OFFSET es,ES*/ - /*CFI_REL_OFFSET ds,DS*/ - CFI_REL_OFFSET r11,8 - CFI_REL_OFFSET rcx,0 - movw %ds,%cx - cmpw %cx,0x10(%rsp) - CFI_REMEMBER_STATE - jne 1f - movw %es,%cx - cmpw %cx,0x18(%rsp) - jne 1f - movw %fs,%cx - cmpw %cx,0x20(%rsp) - jne 1f - movw %gs,%cx - cmpw %cx,0x28(%rsp) - jne 1f - /* All segments match their saved values => Category 2 (Bad IRET). */ - movq (%rsp),%rcx - CFI_RESTORE rcx - movq 8(%rsp),%r11 - CFI_RESTORE r11 - addq $0x30,%rsp - CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 /* RIP */ - pushq_cfi %r11 - pushq_cfi %rcx - jmp general_protection - CFI_RESTORE_STATE -1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ - movq (%rsp),%rcx - CFI_RESTORE rcx - movq 8(%rsp),%r11 - CFI_RESTORE r11 - addq $0x30,%rsp - CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 - SAVE_ALL - jmp error_exit - CFI_ENDPROC -END(xen_failsafe_callback) - -apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ - xen_hvm_callback_vector xen_evtchn_do_upcall - -#endif /* CONFIG_XEN */ - -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - -paranoidzeroentry_ist debug do_debug DEBUG_STACK -paranoidzeroentry_ist int3 do_int3 DEBUG_STACK -paranoiderrorentry stack_segment do_stack_segment -#ifdef CONFIG_XEN -zeroentry xen_debug do_debug -zeroentry xen_int3 do_int3 -errorentry xen_stack_segment do_stack_segment -#endif -errorentry general_protection do_general_protection -errorentry page_fault do_page_fault -#ifdef CONFIG_KVM_GUEST -errorentry async_page_fault do_async_page_fault -#endif -#ifdef CONFIG_X86_MCE -paranoidzeroentry machine_check *machine_check_vector(%rip) -#endif - - /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. - * - * "trace" is 0 for the NMI handler only, because irq-tracing - * is fundamentally NMI-unsafe. (we cannot change the soft and - * hard flags at once, atomically) - */ - - /* ebx: no swapgs flag */ -ENTRY(paranoid_exit) - DEFAULT_FRAME - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: - TRACE_IRQS_IRETQ 0 - SWAPGS_UNSAFE_STACK - RESTORE_ALL 8 - jmp irq_return -paranoid_restore: - TRACE_IRQS_IRETQ 0 - RESTORE_ALL 8 - jmp irq_return -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp paranoid_userspace -paranoid_schedule: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - call schedule - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - jmp paranoid_userspace - CFI_ENDPROC -END(paranoid_exit) - -/* - * Exception entry point. This expects an error code/orig_rax on the stack. - * returns in "no swapgs flag" in %ebx. - */ -ENTRY(error_entry) - XCPT_FRAME - CFI_ADJUST_CFA_OFFSET 15*8 - /* oldrax contains error code */ - cld - movq_cfi rdi, RDI+8 - movq_cfi rsi, RSI+8 - movq_cfi rdx, RDX+8 - movq_cfi rcx, RCX+8 - movq_cfi rax, RAX+8 - movq_cfi r8, R8+8 - movq_cfi r9, R9+8 - movq_cfi r10, R10+8 - movq_cfi r11, R11+8 - movq_cfi rbx, RBX+8 - movq_cfi rbp, RBP+8 - movq_cfi r12, R12+8 - movq_cfi r13, R13+8 - movq_cfi r14, R14+8 - movq_cfi r15, R15+8 - xorl %ebx,%ebx - testl $3,CS+8(%rsp) - je error_kernelspace -error_swapgs: - SWAPGS -error_sti: - TRACE_IRQS_OFF - ret - -/* - * There are two places in the kernel that can potentially fault with - * usergs. Handle them here. The exception handlers after iret run with - * kernel gs again, so don't set the user space flag. B stepping K8s - * sometimes report an truncated RIP for IRET exceptions returning to - * compat mode. Check for these here too. - */ -error_kernelspace: - incl %ebx - leaq irq_return(%rip),%rcx - cmpq %rcx,RIP+8(%rsp) - je error_swapgs - movl %ecx,%eax /* zero extend */ - cmpq %rax,RIP+8(%rsp) - je bstep_iret - cmpq $gs_change,RIP+8(%rsp) - je error_swapgs - jmp error_sti - -bstep_iret: - /* Fix truncated RIP */ - movq %rcx,RIP+8(%rsp) - jmp error_swapgs - CFI_ENDPROC -END(error_entry) - - -/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ -ENTRY(error_exit) - DEFAULT_FRAME - movl %ebx,%eax - RESTORE_REST - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - GET_THREAD_INFO(%rcx) - testl %eax,%eax - jne retint_kernel - LOCKDEP_SYS_EXIT_IRQ - movl TI_flags(%rcx),%edx - movl $_TIF_WORK_MASK,%edi - andl %edi,%edx - jnz retint_careful - jmp retint_swapgs - CFI_ENDPROC -END(error_exit) - -/* - * Test if a given stack is an NMI stack or not. - */ - .macro test_in_nmi reg stack nmi_ret normal_ret - cmpq %\reg, \stack - ja \normal_ret - subq $EXCEPTION_STKSZ, %\reg - cmpq %\reg, \stack - jb \normal_ret - jmp \nmi_ret - .endm - - /* runs on exception stack */ -ENTRY(nmi) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - /* - * We allow breakpoints in NMIs. If a breakpoint occurs, then - * the iretq it performs will take us out of NMI context. - * This means that we can have nested NMIs where the next - * NMI is using the top of the stack of the previous NMI. We - * can't let it execute because the nested NMI will corrupt the - * stack of the previous NMI. NMI handlers are not re-entrant - * anyway. - * - * To handle this case we do the following: - * Check the a special location on the stack that contains - * a variable that is set when NMIs are executing. - * The interrupted task's stack is also checked to see if it - * is an NMI stack. - * If the variable is not set and the stack is not the NMI - * stack then: - * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack - * o Continue processing the NMI - * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi - * o return back to the first NMI - * - * Now on exit of the first NMI, we first clear the stack variable - * The NMI stack will tell any nested NMIs at that point that it is - * nested. Then we pop the stack normally with iret, and if there was - * a nested NMI that updated the copy interrupt stack frame, a - * jump will be made to the repeat_nmi code that will handle the second - * NMI. - */ - - /* Use %rdx as out temp variable throughout */ - pushq_cfi %rdx - CFI_REL_OFFSET rdx, 0 - - /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. - */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi - - /* - * Check the special variable on the stack to see if NMIs are - * executing. - */ - cmpl $1, -8(%rsp) - je nested_nmi - - /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. - */ - lea 6*8(%rsp), %rdx - test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi - CFI_REMEMBER_STATE - -nested_nmi: - /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. - */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ - leaq -6*8(%rsp), %rdx - movq %rdx, %rsp - CFI_ADJUST_CFA_OFFSET 6*8 - pushq_cfi $__KERNEL_DS - pushq_cfi %rdx - pushfq_cfi - pushq_cfi $__KERNEL_CS - pushq_cfi $repeat_nmi - - /* Put stack back */ - addq $(11*8), %rsp - CFI_ADJUST_CFA_OFFSET -11*8 - -nested_nmi_out: - popq_cfi %rdx - CFI_RESTORE rdx - - /* No need to check faults here */ - INTERRUPT_RETURN - - CFI_RESTORE_STATE -first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ - movq (%rsp), %rdx - CFI_RESTORE rdx - - /* Set the NMI executing variable on the stack. */ - pushq_cfi $1 - - /* Copy the stack frame to the Saved frame */ - .rept 5 - pushq_cfi 6*8(%rsp) - .endr - CFI_DEF_CFA_OFFSET SS+8-RIP - - /* Everything up to here is safe from nested NMIs */ - - /* - * If there was a nested NMI, the first NMI's iret will return - * here. But NMIs are still enabled and we can take another - * nested NMI. The nested NMI checks the interrupted RIP to see - * if it is between repeat_nmi and end_repeat_nmi, and if so - * it will just return, as we are about to repeat an NMI anyway. - * This makes it safe to copy to the stack frame that a nested - * NMI will update. - */ -repeat_nmi: - /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). - */ - movq $1, 5*8(%rsp) - - /* Make another copy, this one may be modified by nested NMIs */ - .rept 5 - pushq_cfi 4*8(%rsp) - .endr - CFI_DEF_CFA_OFFSET SS+8-RIP -end_repeat_nmi: - - /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. - */ - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 - /* - * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit - * as we should not be calling schedule in NMI context. - * Even with normal interrupts enabled. An NMI should not be - * setting NEED_RESCHED or anything that normal interrupts and - * exceptions might do. - */ - call save_paranoid - DEFAULT_FRAME 0 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ - movq %rsp,%rdi - movq $-1,%rsi - call do_nmi - testl %ebx,%ebx /* swapgs needed? */ - jnz nmi_restore -nmi_swapgs: - SWAPGS_UNSAFE_STACK -nmi_restore: - RESTORE_ALL 8 - /* Clear the NMI executing stack variable */ - movq $0, 10*8(%rsp) - jmp irq_return - CFI_ENDPROC -END(nmi) - -ENTRY(ignore_sysret) - CFI_STARTPROC - mov $-ENOSYS,%eax - sysret - CFI_ENDPROC -END(ignore_sysret) - -/* - * End of kprobes section - */ - .popsection diff --git a/ANDROID_3.4.5/arch/x86/kernel/ftrace.c b/ANDROID_3.4.5/arch/x86/kernel/ftrace.c deleted file mode 100644 index c9a281f2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/ftrace.c +++ /dev/null @@ -1,455 +0,0 @@ -/* - * Code for replacing ftrace calls with jumps. - * - * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> - * - * Thanks goes to Ingo Molnar, for suggesting the idea. - * Mathieu Desnoyers, for suggesting postponing the modifications. - * Arjan van de Ven, for keeping me straight, and explaining to me - * the dangers of modifying code on the run. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/spinlock.h> -#include <linux/hardirq.h> -#include <linux/uaccess.h> -#include <linux/ftrace.h> -#include <linux/percpu.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/module.h> - -#include <trace/syscall.h> - -#include <asm/cacheflush.h> -#include <asm/ftrace.h> -#include <asm/nops.h> -#include <asm/nmi.h> - - -#ifdef CONFIG_DYNAMIC_FTRACE - -/* - * modifying_code is set to notify NMIs that they need to use - * memory barriers when entering or exiting. But we don't want - * to burden NMIs with unnecessary memory barriers when code - * modification is not being done (which is most of the time). - * - * A mutex is already held when ftrace_arch_code_modify_prepare - * and post_process are called. No locks need to be taken here. - * - * Stop machine will make sure currently running NMIs are done - * and new NMIs will see the updated variable before we need - * to worry about NMIs doing memory barriers. - */ -static int modifying_code __read_mostly; -static DEFINE_PER_CPU(int, save_modifying_code); - -int ftrace_arch_code_modify_prepare(void) -{ - set_kernel_text_rw(); - set_all_modules_text_rw(); - modifying_code = 1; - return 0; -} - -int ftrace_arch_code_modify_post_process(void) -{ - modifying_code = 0; - set_all_modules_text_ro(); - set_kernel_text_ro(); - return 0; -} - -union ftrace_code_union { - char code[MCOUNT_INSN_SIZE]; - struct { - char e8; - int offset; - } __attribute__((packed)); -}; - -static int ftrace_calc_offset(long ip, long addr) -{ - return (int)(addr - ip); -} - -static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) -{ - static union ftrace_code_union calc; - - calc.e8 = 0xe8; - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - - /* - * No locking needed, this must be called via kstop_machine - * which in essence is like running on a uniprocessor machine. - */ - return calc.code; -} - -/* - * Modifying code must take extra care. On an SMP machine, if - * the code being modified is also being executed on another CPU - * that CPU will have undefined results and possibly take a GPF. - * We use kstop_machine to stop other CPUS from exectuing code. - * But this does not stop NMIs from happening. We still need - * to protect against that. We separate out the modification of - * the code to take care of this. - * - * Two buffers are added: An IP buffer and a "code" buffer. - * - * 1) Put the instruction pointer into the IP buffer - * and the new code into the "code" buffer. - * 2) Wait for any running NMIs to finish and set a flag that says - * we are modifying code, it is done in an atomic operation. - * 3) Write the code - * 4) clear the flag. - * 5) Wait for any running NMIs to finish. - * - * If an NMI is executed, the first thing it does is to call - * "ftrace_nmi_enter". This will check if the flag is set to write - * and if it is, it will write what is in the IP and "code" buffers. - * - * The trick is, it does not matter if everyone is writing the same - * content to the code location. Also, if a CPU is executing code - * it is OK to write to that code location if the contents being written - * are the same as what exists. - */ - -#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ -static atomic_t nmi_running = ATOMIC_INIT(0); -static int mod_code_status; /* holds return value of text write */ -static void *mod_code_ip; /* holds the IP to write to */ -static const void *mod_code_newcode; /* holds the text to write to the IP */ - -static unsigned nmi_wait_count; -static atomic_t nmi_update_count = ATOMIC_INIT(0); - -int ftrace_arch_read_dyn_info(char *buf, int size) -{ - int r; - - r = snprintf(buf, size, "%u %u", - nmi_wait_count, - atomic_read(&nmi_update_count)); - return r; -} - -static void clear_mod_flag(void) -{ - int old = atomic_read(&nmi_running); - - for (;;) { - int new = old & ~MOD_CODE_WRITE_FLAG; - - if (old == new) - break; - - old = atomic_cmpxchg(&nmi_running, old, new); - } -} - -static void ftrace_mod_code(void) -{ - /* - * Yes, more than one CPU process can be writing to mod_code_status. - * (and the code itself) - * But if one were to fail, then they all should, and if one were - * to succeed, then they all should. - */ - mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, - MCOUNT_INSN_SIZE); - - /* if we fail, then kill any new writers */ - if (mod_code_status) - clear_mod_flag(); -} - -void ftrace_nmi_enter(void) -{ - __this_cpu_write(save_modifying_code, modifying_code); - - if (!__this_cpu_read(save_modifying_code)) - return; - - if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { - smp_rmb(); - ftrace_mod_code(); - atomic_inc(&nmi_update_count); - } - /* Must have previous changes seen before executions */ - smp_mb(); -} - -void ftrace_nmi_exit(void) -{ - if (!__this_cpu_read(save_modifying_code)) - return; - - /* Finish all executions before clearing nmi_running */ - smp_mb(); - atomic_dec(&nmi_running); -} - -static void wait_for_nmi_and_set_mod_flag(void) -{ - if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) - return; - - do { - cpu_relax(); - } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); - - nmi_wait_count++; -} - -static void wait_for_nmi(void) -{ - if (!atomic_read(&nmi_running)) - return; - - do { - cpu_relax(); - } while (atomic_read(&nmi_running)); - - nmi_wait_count++; -} - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr < end; -} - -static int -do_ftrace_mod_code(unsigned long ip, const void *new_code) -{ - /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. - * - * For 32bit kernels, these mappings are same and we can use - * kernel identity mapping to modify code. - */ - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa(ip)); - - mod_code_ip = (void *)ip; - mod_code_newcode = new_code; - - /* The buffers need to be visible before we let NMIs write them */ - smp_mb(); - - wait_for_nmi_and_set_mod_flag(); - - /* Make sure all running NMIs have finished before we write the code */ - smp_mb(); - - ftrace_mod_code(); - - /* Make sure the write happens before clearing the bit */ - smp_mb(); - - clear_mod_flag(); - wait_for_nmi(); - - return mod_code_status; -} - -static const unsigned char *ftrace_nop_replace(void) -{ - return ideal_nops[NOP_ATOMIC5]; -} - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - - /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. - */ - - /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - - /* replace the text with the new text */ - if (do_ftrace_mod_code(ip, new_code)) - return -EPERM; - - sync_core(); - - return 0; -} - -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned const char *new, *old; - unsigned long ip = rec->ip; - - old = ftrace_call_replace(ip, addr); - new = ftrace_nop_replace(); - - return ftrace_modify_code(rec->ip, old, new); -} - -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned const char *new, *old; - unsigned long ip = rec->ip; - - old = ftrace_nop_replace(); - new = ftrace_call_replace(ip, addr); - - return ftrace_modify_code(rec->ip, old, new); -} - -int ftrace_update_ftrace_func(ftrace_func_t func) -{ - unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char old[MCOUNT_INSN_SIZE], *new; - int ret; - - memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, (unsigned long)func); - ret = ftrace_modify_code(ip, old, new); - - return ret; -} - -int __init ftrace_dyn_arch_init(void *data) -{ - /* The return code is retured via data */ - *(unsigned long *)data = 0; - - return 0; -} -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -#ifdef CONFIG_DYNAMIC_FTRACE -extern void ftrace_graph_call(void); - -static int ftrace_mod_jmp(unsigned long ip, - int old_offset, int new_offset) -{ - unsigned char code[MCOUNT_INSN_SIZE]; - - if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) - return -EINVAL; - - *(int *)(&code[1]) = new_offset; - - if (do_ftrace_mod_code(ip, &code)) - return -EPERM; - - return 0; -} - -int ftrace_enable_ftrace_graph_caller(void) -{ - unsigned long ip = (unsigned long)(&ftrace_graph_call); - int old_offset, new_offset; - - old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); - new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); - - return ftrace_mod_jmp(ip, old_offset, new_offset); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - unsigned long ip = (unsigned long)(&ftrace_graph_call); - int old_offset, new_offset; - - old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); - new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); - - return ftrace_mod_jmp(ip, old_offset, new_offset); -} - -#endif /* !CONFIG_DYNAMIC_FTRACE */ - -/* - * Hook the return address and push it in the stack of return addrs - * in current thread info. - */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, - unsigned long frame_pointer) -{ - unsigned long old; - int faulted; - struct ftrace_graph_ent trace; - unsigned long return_hooker = (unsigned long) - &return_to_handler; - - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - return; - - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: " _ASM_MOV " (%[parent]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" - " movl $0, %[faulted]\n" - "3:\n" - - ".section .fixup, \"ax\"\n" - "4: movl $1, %[faulted]\n" - " jmp 3b\n" - ".previous\n" - - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) - - : [old] "=&r" (old), [faulted] "=r" (faulted) - : [parent] "r" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (unlikely(faulted)) { - ftrace_graph_stop(); - WARN_ON(1); - return; - } - - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; - return; - } - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer) == -EBUSY) { - *parent = old; - return; - } -} -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/head.c b/ANDROID_3.4.5/arch/x86/kernel/head.c deleted file mode 100644 index 48d9d4ea..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/head.c +++ /dev/null @@ -1,56 +0,0 @@ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/memblock.h> - -#include <asm/setup.h> -#include <asm/bios_ebda.h> - -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - memblock_reserve(lowmem, 0x100000 - lowmem); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/head32.c b/ANDROID_3.4.5/arch/x86/kernel/head32.c deleted file mode 100644 index 51ff1861..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/head32.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * linux/arch/i386/kernel/head32.c -- prepare to run common code - * - * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright (C) 2007 Eric Biederman <ebiederm@xmission.com> - */ - -#include <linux/init.h> -#include <linux/start_kernel.h> -#include <linux/mm.h> -#include <linux/memblock.h> - -#include <asm/setup.h> -#include <asm/sections.h> -#include <asm/e820.h> -#include <asm/page.h> -#include <asm/trampoline.h> -#include <asm/apic.h> -#include <asm/io_apic.h> -#include <asm/bios_ebda.h> -#include <asm/tlbflush.h> - -static void __init i386_default_early_setup(void) -{ - /* Initialize 32bit specific setup functions */ - x86_init.resources.reserve_resources = i386_reserve_resources; - x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; - - reserve_ebda_region(); -} - -void __init i386_start_kernel(void) -{ - memblock_reserve(__pa_symbol(&_text), - __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); - -#ifdef CONFIG_BLK_DEV_INITRD - /* Reserve INITRD */ - if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { - /* Assume only end is not page aligned */ - u64 ramdisk_image = boot_params.hdr.ramdisk_image; - u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); - } -#endif - - /* Call the subarch specific early setup function */ - switch (boot_params.hdr.hardware_subarch) { - case X86_SUBARCH_MRST: - x86_mrst_early_setup(); - break; - case X86_SUBARCH_CE4100: - x86_ce4100_early_setup(); - break; - default: - i386_default_early_setup(); - break; - } - - /* - * At this point everything still needed from the boot loader - * or BIOS or kernel text should be early reserved or marked not - * RAM in e820. All other memory is free game. - */ - - start_kernel(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/head64.c b/ANDROID_3.4.5/arch/x86/kernel/head64.c deleted file mode 100644 index 3a3b779f..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/head64.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * prepare to run common code - * - * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE - */ - -#include <linux/init.h> -#include <linux/linkage.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/percpu.h> -#include <linux/start_kernel.h> -#include <linux/io.h> -#include <linux/memblock.h> - -#include <asm/processor.h> -#include <asm/proto.h> -#include <asm/smp.h> -#include <asm/setup.h> -#include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/sections.h> -#include <asm/kdebug.h> -#include <asm/e820.h> -#include <asm/trampoline.h> -#include <asm/bios_ebda.h> - -static void __init zap_identity_mappings(void) -{ - pgd_t *pgd = pgd_offset_k(0UL); - pgd_clear(pgd); - __flush_tlb_all(); -} - -/* Don't add a printk in there. printk relies on the PDA which is not initialized - yet. */ -static void __init clear_bss(void) -{ - memset(__bss_start, 0, - (unsigned long) __bss_stop - (unsigned long) __bss_start); -} - -static void __init copy_bootdata(char *real_mode_data) -{ - char * command_line; - - memcpy(&boot_params, real_mode_data, sizeof boot_params); - if (boot_params.hdr.cmd_line_ptr) { - command_line = __va(boot_params.hdr.cmd_line_ptr); - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); - } -} - -void __init x86_64_start_kernel(char * real_mode_data) -{ - int i; - - /* - * Build-time sanity checks on the kernel image and module - * area mappings. (these are purely build-time and produce no code) - */ - BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START); - BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE); - BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE); - BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0); - BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); - BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); - BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == - (__START_KERNEL & PGDIR_MASK))); - BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); - - /* clear bss before set_intr_gate with early_idt_handler */ - clear_bss(); - - /* Make NULL pointers segfault */ - zap_identity_mappings(); - - max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; - - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { -#ifdef CONFIG_EARLY_PRINTK - set_intr_gate(i, &early_idt_handlers[i]); -#else - set_intr_gate(i, early_idt_handler); -#endif - } - load_idt((const struct desc_ptr *)&idt_descr); - - if (console_loglevel == 10) - early_printk("Kernel alive\n"); - - x86_64_start_reservations(real_mode_data); -} - -void __init x86_64_start_reservations(char *real_mode_data) -{ - copy_bootdata(__va(real_mode_data)); - - memblock_reserve(__pa_symbol(&_text), - __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); - -#ifdef CONFIG_BLK_DEV_INITRD - /* Reserve INITRD */ - if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { - /* Assume only end is not page aligned */ - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); - } -#endif - - reserve_ebda_region(); - - /* - * At this point everything still needed from the boot loader - * or BIOS or kernel text should be early reserved or marked not - * RAM in e820. All other memory is free game. - */ - - start_kernel(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/head_32.S b/ANDROID_3.4.5/arch/x86/kernel/head_32.S deleted file mode 100644 index ce0be7cd..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/head_32.S +++ /dev/null @@ -1,731 +0,0 @@ -/* - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Enhanced CPU detection and feature setting code by Mike Jagdis - * and Martin Mares, November 1997. - */ - -.text -#include <linux/threads.h> -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/page_types.h> -#include <asm/pgtable_types.h> -#include <asm/cache.h> -#include <asm/thread_info.h> -#include <asm/asm-offsets.h> -#include <asm/setup.h> -#include <asm/processor-flags.h> -#include <asm/msr-index.h> -#include <asm/cpufeature.h> -#include <asm/percpu.h> - -/* Physical address */ -#define pa(X) ((X) - __PAGE_OFFSET) - -/* - * References to members of the new_cpu_data structure. - */ - -#define X86 new_cpu_data+CPUINFO_x86 -#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor -#define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask -#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math -#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level -#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability -#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id - -/* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries - */ - -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif - -/* Number of possible pages in the lowmem region */ -LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT - -/* - * Worst-case size of the kernel mapping we need to make: - * a relocatable kernel can live anywhere in lowmem, so we need to be able - * to map all of lowmem. - */ -KERNEL_PAGES = LOWMEM_PAGES - -INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE -RESERVE_BRK(pagetables, INIT_MAP_SIZE) - -/* - * 32-bit kernel entrypoint; only used by the boot CPU. On entry, - * %esi points to the real-mode code as a 32-bit pointer. - * CS and DS must be 4 GB flat segments, but we don't depend on - * any particular GDT layout, because we load our own as soon as we - * can. - */ -__HEAD -ENTRY(startup_32) - movl pa(stack_start),%ecx - - /* test KEEP_SEGMENTS flag to see if the bootloader is asking - us to not reload segments */ - testb $(1<<6), BP_loadflags(%esi) - jnz 2f - -/* - * Set segments to known values. - */ - lgdt pa(boot_gdt_descr) - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - movl %eax,%ss -2: - leal -__PAGE_OFFSET(%ecx),%esp - -/* - * Clear BSS first so that there are no surprises... - */ - cld - xorl %eax,%eax - movl $pa(__bss_start),%edi - movl $pa(__bss_stop),%ecx - subl %edi,%ecx - shrl $2,%ecx - rep ; stosl -/* - * Copy bootup parameters out of the way. - * Note: %esi still has the pointer to the real-mode data. - * With the kexec as boot loader, parameter segment might be loaded beyond - * kernel image and might not even be addressable by early boot page tables. - * (kexec on panic case). Hence copy out the parameters before initializing - * page tables. - */ - movl $pa(boot_params),%edi - movl $(PARAM_SIZE/4),%ecx - cld - rep - movsl - movl pa(boot_params) + NEW_CL_POINTER,%esi - andl %esi,%esi - jz 1f # No command line - movl $pa(boot_command_line),%edi - movl $(COMMAND_LINE_SIZE/4),%ecx - rep - movsl -1: - -#ifdef CONFIG_OLPC - /* save OFW's pgdir table for later use when calling into OFW */ - movl %cr3, %eax - movl %eax, pa(olpc_ofw_pgd) -#endif - -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - */ -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_page_table+0xffc) -#endif - -#ifdef CONFIG_PARAVIRT - /* This is can only trip for a broken bootloader... */ - cmpw $0x207, pa(boot_params + BP_version) - jb default_entry - - /* Paravirt-compatible boot parameters. Look to see what architecture - we're booting under. */ - movl pa(boot_params + BP_hardware_subarch), %eax - cmpl $num_subarch_entries, %eax - jae bad_subarch - - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax - -bad_subarch: -WEAK(lguest_entry) -WEAK(xen_entry) - /* Unknown implementation; there's really - nothing we can do at this point. */ - ud2a - - __INITDATA - -subarch_entries: - .long default_entry /* normal x86/PC */ - .long lguest_entry /* lguest hypervisor */ - .long xen_entry /* Xen hypervisor */ - .long default_entry /* Moorestown MID */ -num_subarch_entries = (. - subarch_entries) / 4 -.previous -#else - jmp default_entry -#endif /* CONFIG_PARAVIRT */ - -/* - * Non-boot CPU entry point; entered from trampoline.S - * We can't lgdt here, because lgdt itself uses a data segment, but - * we know the trampoline has already loaded the boot_gdt for us. - * - * If cpu hotplug is not supported then this code can go in init section - * which will be freed later - */ - -__CPUINIT - -#ifdef CONFIG_SMP -ENTRY(startup_32_smp) - cld - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - movl pa(stack_start),%ecx - movl %eax,%ss - leal -__PAGE_OFFSET(%ecx),%esp -#endif /* CONFIG_SMP */ -default_entry: - -/* - * New page tables may be in 4Mbyte page mode and may - * be using the global pages. - * - * NOTE! If we are on a 486 we may have no cr4 at all! - * So we do not try to touch it unless we really have - * some bits in it to set. This won't work if the BSP - * implements cr4 but this AP does not -- very unlikely - * but be warned! The same applies to the pse feature - * if not equally supported. --macro - * - * NOTE! We have to correct for the fact that we're - * not yet offset PAGE_OFFSET.. - */ -#define cr4_bits pa(mmu_cr4_features) - movl cr4_bits,%edx - andl %edx,%edx - jz 6f - movl %cr4,%eax # Turn on paging options (PSE,PAE,..) - orl %edx,%eax - movl %eax,%cr4 - - testb $X86_CR4_PAE, %al # check if PAE is enabled - jz 6f - - /* Check if extended functions are implemented */ - movl $0x80000000, %eax - cpuid - /* Value must be in the range 0x80000001 to 0x8000ffff */ - subl $0x80000001, %eax - cmpl $(0x8000ffff-0x80000001), %eax - ja 6f - - /* Clear bogus XD_DISABLE bits */ - call verify_cpu - - mov $0x80000001, %eax - cpuid - /* Execute Disable bit supported? */ - btl $(X86_FEATURE_NX & 31), %edx - jnc 6f - - /* Setup EFER (Extended Feature Enable Register) */ - movl $MSR_EFER, %ecx - rdmsr - - btsl $_EFER_NX, %eax - /* Make changes effective */ - wrmsr - -6: - -/* - * Enable paging - */ - movl $pa(initial_page_table), %eax - movl %eax,%cr3 /* set the page table pointer.. */ - movl %cr0,%eax - orl $X86_CR0_PG,%eax - movl %eax,%cr0 /* ..and set paging (PG) bit */ - ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ -1: - /* Shift the stack pointer to a virtual address */ - addl $__PAGE_OFFSET, %esp - -/* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. - */ - pushl $0 - popfl - -#ifdef CONFIG_SMP - cmpb $0, ready - jnz checkCPUtype -#endif /* CONFIG_SMP */ - -/* - * start system 32-bit setup. We need to re-do some of the things done - * in 16-bit mode for the "real" operations. - */ - call setup_idt - -checkCPUtype: - - movl $-1,X86_CPUID # -1 for no CPUID initially - -/* check if it is 486 or 386. */ -/* - * XXX - this does a lot of unnecessary setup. Alignment checks don't - * apply at our cpl of 0 and the stack ought to be aligned already, and - * we don't need to preserve eflags. - */ - - movb $3,X86 # at least 386 - pushfl # push EFLAGS - popl %eax # get EFLAGS - movl %eax,%ecx # save original EFLAGS - xorl $0x240000,%eax # flip AC and ID bits in EFLAGS - pushl %eax # copy to EFLAGS - popfl # set EFLAGS - pushfl # get new EFLAGS - popl %eax # put it in eax - xorl %ecx,%eax # change in flags - pushl %ecx # restore original EFLAGS - popfl - testl $0x40000,%eax # check if AC bit changed - je is386 - - movb $4,X86 # at least 486 - testl $0x200000,%eax # check if ID bit changed - je is486 - - /* get vendor info */ - xorl %eax,%eax # call CPUID with 0 -> return vendor ID - cpuid - movl %eax,X86_CPUID # save CPUID level - movl %ebx,X86_VENDOR_ID # lo 4 chars - movl %edx,X86_VENDOR_ID+4 # next 4 chars - movl %ecx,X86_VENDOR_ID+8 # last 4 chars - - orl %eax,%eax # do we have processor info as well? - je is486 - - movl $1,%eax # Use the CPUID instruction to get CPU type - cpuid - movb %al,%cl # save reg for future use - andb $0x0f,%ah # mask processor family - movb %ah,X86 - andb $0xf0,%al # mask model - shrb $4,%al - movb %al,X86_MODEL - andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK - movl %edx,X86_CAPABILITY - -is486: movl $0x50022,%ecx # set AM, WP, NE and MP - jmp 2f - -is386: movl $2,%ecx # set MP -2: movl %cr0,%eax - andl $0x80000011,%eax # Save PG,PE,ET - orl %ecx,%eax - movl %eax,%cr0 - - call check_x87 - lgdt early_gdt_descr - lidt idt_descr - ljmp $(__KERNEL_CS),$1f -1: movl $(__KERNEL_DS),%eax # reload all the segment registers - movl %eax,%ss # after changing gdt. - - movl $(__USER_DS),%eax # DS/ES contains default USER segment - movl %eax,%ds - movl %eax,%es - - movl $(__KERNEL_PERCPU), %eax - movl %eax,%fs # set this cpu's percpu - -#ifdef CONFIG_CC_STACKPROTECTOR - /* - * The linker can't handle this by relocation. Manually set - * base address in stack canary segment descriptor. - */ - cmpb $0,ready - jne 1f - movl $gdt_page,%eax - movl $stack_canary,%ecx - movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) - shrl $16, %ecx - movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) - movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) -1: -#endif - movl $(__KERNEL_STACK_CANARY),%eax - movl %eax,%gs - - xorl %eax,%eax # Clear LDT - lldt %ax - - cld # gcc2 wants the direction flag cleared at all times - pushl $0 # fake return address for unwinder - movb $1, ready - jmp *(initial_code) - -/* - * We depend on ET to be correct. This checks for 287/387. - */ -check_x87: - movb $0,X86_HARD_MATH - clts - fninit - fstsw %ax - cmpb $0,%al - je 1f - movl %cr0,%eax /* no coprocessor: have to set bits */ - xorl $4,%eax /* set EM */ - movl %eax,%cr0 - ret - ALIGN -1: movb $1,X86_HARD_MATH - .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ - ret - -/* - * setup_idt - * - * sets up a idt with 256 entries pointing to - * ignore_int, interrupt gates. It doesn't actually load - * idt - that can be done only after paging has been enabled - * and the kernel moved to PAGE_OFFSET. Interrupts - * are enabled elsewhere, when we can be relatively - * sure everything is ok. - * - * Warning: %esi is live across this function. - */ -setup_idt: - lea ignore_int,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax /* selector = 0x0010 = cs */ - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - - lea idt_table,%edi - mov $256,%ecx -rp_sidt: - movl %eax,(%edi) - movl %edx,4(%edi) - addl $8,%edi - dec %ecx - jne rp_sidt - -.macro set_early_handler handler,trapno - lea \handler,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - lea idt_table,%edi - movl %eax,8*\trapno(%edi) - movl %edx,8*\trapno+4(%edi) -.endm - - set_early_handler handler=early_divide_err,trapno=0 - set_early_handler handler=early_illegal_opcode,trapno=6 - set_early_handler handler=early_protection_fault,trapno=13 - set_early_handler handler=early_page_fault,trapno=14 - - ret - -early_divide_err: - xor %edx,%edx - pushl $0 /* fake errcode */ - jmp early_fault - -early_illegal_opcode: - movl $6,%edx - pushl $0 /* fake errcode */ - jmp early_fault - -early_protection_fault: - movl $13,%edx - jmp early_fault - -early_page_fault: - movl $14,%edx - jmp early_fault - -early_fault: - cld -#ifdef CONFIG_PRINTK - pusha - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag - movl %cr2,%eax - pushl %eax - pushl %edx /* trapno */ - pushl $fault_msg - call printk -#endif - call dump_stack -hlt_loop: - hlt - jmp hlt_loop - -/* This is the default interrupt "handler" :-) */ - ALIGN -ignore_int: - cld -#ifdef CONFIG_PRINTK - pushl %eax - pushl %ecx - pushl %edx - pushl %es - pushl %ds - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag - pushl 16(%esp) - pushl 24(%esp) - pushl 32(%esp) - pushl 40(%esp) - pushl $int_msg - call printk - - call dump_stack - - addl $(5*4),%esp - popl %ds - popl %es - popl %edx - popl %ecx - popl %eax -#endif - iret - -#include "verify_cpu.S" - - __REFDATA -.align 4 -ENTRY(initial_code) - .long i386_start_kernel - -/* - * BSS section - */ -__PAGE_ALIGNED_BSS - .align PAGE_SIZE -#ifdef CONFIG_X86_PAE -initial_pg_pmd: - .fill 1024*KPMDS,4,0 -#else -ENTRY(initial_page_table) - .fill 1024,4,0 -#endif -initial_pg_fixmap: - .fill 1024,4,0 -ENTRY(empty_zero_page) - .fill 4096,1,0 -ENTRY(swapper_pg_dir) - .fill 1024,4,0 - -/* - * This starts the data section. - */ -#ifdef CONFIG_X86_PAE -__PAGE_ALIGNED_DATA - /* Page-aligned for the benefit of paravirt? */ - .align PAGE_SIZE -ENTRY(initial_page_table) - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ -# if KPMDS == 3 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 -# elif KPMDS == 2 - .long 0,0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 -# elif KPMDS == 1 - .long 0,0 - .long 0,0 - .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 -# else -# error "Kernel PMDs should be 1, 2 or 3" -# endif - .align PAGE_SIZE /* needs to be page-sized too */ -#endif - -.data -.balign 4 -ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE - -early_recursion_flag: - .long 0 - -ready: .byte 0 - -int_msg: - .asciz "Unknown interrupt or fault at: %p %p %p\n" - -fault_msg: -/* fault info: */ - .ascii "BUG: Int %d: CR2 %p\n" -/* pusha regs: */ - .ascii " EDI %p ESI %p EBP %p ESP %p\n" - .ascii " EBX %p EDX %p ECX %p EAX %p\n" -/* fault frame: */ - .ascii " err %p EIP %p CS %p flg %p\n" - .ascii "Stack: %p %p %p %p %p %p %p %p\n" - .ascii " %p %p %p %p %p %p %p %p\n" - .asciz " %p %p %p %p %p %p %p %p\n" - -#include "../../x86/xen/xen-head.S" - -/* - * The IDT and GDT 'descriptors' are a strange 48-bit object - * only used by the lidt and lgdt instructions. They are not - * like usual segment descriptors - they consist of a 16-bit - * segment size, and 32-bit linear address value: - */ - -.globl boot_gdt_descr -.globl idt_descr - - ALIGN -# early boot GDT descriptor (must use 1:1 address mapping) - .word 0 # 32 bit align gdt_desc.address -boot_gdt_descr: - .word __BOOT_DS+7 - .long boot_gdt - __PAGE_OFFSET - - .word 0 # 32-bit align idt_desc.address -idt_descr: - .word IDT_ENTRIES*8-1 # idt contains 256 entries - .long idt_table - -# boot GDT descriptor (later on used by CPU#0): - .word 0 # 32 bit align gdt_desc.address -ENTRY(early_gdt_descr) - .word GDT_ENTRIES*8-1 - .long gdt_page /* Overwritten for secondary CPUs */ - -/* - * The boot_gdt must mirror the equivalent in setup.S and is - * used only for booting. - */ - .align L1_CACHE_BYTES -ENTRY(boot_gdt) - .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/head_64.S b/ANDROID_3.4.5/arch/x86/kernel/head_64.S deleted file mode 100644 index 40f4eb37..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/head_64.S +++ /dev/null @@ -1,427 +0,0 @@ -/* - * linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit - * - * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> - * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> - * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> - * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com> - */ - - -#include <linux/linkage.h> -#include <linux/threads.h> -#include <linux/init.h> -#include <asm/segment.h> -#include <asm/pgtable.h> -#include <asm/page.h> -#include <asm/msr.h> -#include <asm/cache.h> -#include <asm/processor-flags.h> -#include <asm/percpu.h> - -#ifdef CONFIG_PARAVIRT -#include <asm/asm-offsets.h> -#include <asm/paravirt.h> -#else -#define GET_CR2_INTO_RCX movq %cr2, %rcx -#endif - -/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE - * because we need identity-mapped pages. - * - */ - -#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) - -L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) -L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) -L4_START_KERNEL = pgd_index(__START_KERNEL_map) -L3_START_KERNEL = pud_index(__START_KERNEL_map) - - .text - __HEAD - .code64 - .globl startup_64 -startup_64: - - /* - * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, - * and someone has loaded an identity mapped page table - * for us. These identity mapped page tables map all of the - * kernel pages and possibly all of memory. - * - * %esi holds a physical pointer to real_mode_data. - * - * We come here either directly from a 64bit bootloader, or from - * arch/x86_64/boot/compressed/head.S. - * - * We only come here initially at boot nothing else comes here. - * - * Since we may be loaded at an address different from what we were - * compiled to run at we first fixup the physical addresses in our page - * tables and then reload them. - */ - - /* Compute the delta between the address I am compiled to run at and the - * address I am actually running at. - */ - leaq _text(%rip), %rbp - subq $_text - __START_KERNEL_map, %rbp - - /* Is the address not 2M aligned? */ - movq %rbp, %rax - andl $~PMD_PAGE_MASK, %eax - testl %eax, %eax - jnz bad_address - - /* Is the address too large? */ - leaq _text(%rip), %rdx - movq $PGDIR_SIZE, %rax - cmpq %rax, %rdx - jae bad_address - - /* Fixup the physical addresses in the page table - */ - addq %rbp, init_level4_pgt + 0(%rip) - addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) - addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) - - addq %rbp, level3_ident_pgt + 0(%rip) - - addq %rbp, level3_kernel_pgt + (510*8)(%rip) - addq %rbp, level3_kernel_pgt + (511*8)(%rip) - - addq %rbp, level2_fixmap_pgt + (506*8)(%rip) - - /* Add an Identity mapping if I am above 1G */ - leaq _text(%rip), %rdi - andq $PMD_PAGE_MASK, %rdi - - movq %rdi, %rax - shrq $PUD_SHIFT, %rax - andq $(PTRS_PER_PUD - 1), %rax - jz ident_complete - - leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx - leaq level3_ident_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) - - movq %rdi, %rax - shrq $PMD_SHIFT, %rax - andq $(PTRS_PER_PMD - 1), %rax - leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx - leaq level2_spare_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) -ident_complete: - - /* - * Fixup the kernel text+data virtual addresses. Note that - * we might write invalid pmds, when the kernel is relocated - * cleanup_highmap() fixes this up along with the mappings - * beyond _end. - */ - - leaq level2_kernel_pgt(%rip), %rdi - leaq 4096(%rdi), %r8 - /* See if it is a valid page table entry */ -1: testq $1, 0(%rdi) - jz 2f - addq %rbp, 0(%rdi) - /* Go to the next page */ -2: addq $8, %rdi - cmp %r8, %rdi - jne 1b - - /* Fixup phys_base */ - addq %rbp, phys_base(%rip) - - /* Fixup trampoline */ - addq %rbp, trampoline_level4_pgt + 0(%rip) - addq %rbp, trampoline_level4_pgt + (511*8)(%rip) - - /* Due to ENTRY(), sometimes the empty space gets filled with - * zeros. Better take a jmp than relying on empty space being - * filled with 0x90 (nop) - */ - jmp secondary_startup_64 -ENTRY(secondary_startup_64) - /* - * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, - * and someone has loaded a mapped page table. - * - * %esi holds a physical pointer to real_mode_data. - * - * We come here either from startup_64 (using physical addresses) - * or from trampoline.S (using virtual addresses). - * - * Using virtual addresses from trampoline.S removes the need - * to have any identity mapped pages in the kernel page table - * after the boot processor executes this code. - */ - - /* Enable PAE mode and PGE */ - movl $(X86_CR4_PAE | X86_CR4_PGE), %eax - movq %rax, %cr4 - - /* Setup early boot stage 4 level pagetables. */ - movq $(init_level4_pgt - __START_KERNEL_map), %rax - addq phys_base(%rip), %rax - movq %rax, %cr3 - - /* Ensure I am executing from virtual addresses */ - movq $1f, %rax - jmp *%rax -1: - - /* Check if nx is implemented */ - movl $0x80000001, %eax - cpuid - movl %edx,%edi - - /* Setup EFER (Extended Feature Enable Register) */ - movl $MSR_EFER, %ecx - rdmsr - btsl $_EFER_SCE, %eax /* Enable System Call */ - btl $20,%edi /* No Execute supported? */ - jnc 1f - btsl $_EFER_NX, %eax -1: wrmsr /* Make changes effective */ - - /* Setup cr0 */ -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ - X86_CR0_PG) - movl $CR0_STATE, %eax - /* Make changes effective */ - movq %rax, %cr0 - - /* Setup a boot time stack */ - movq stack_start(%rip),%rsp - - /* zero EFLAGS after setting rsp */ - pushq $0 - popfq - - /* - * We must switch to a new descriptor in kernel space for the GDT - * because soon the kernel won't have access anymore to the userspace - * addresses where we're currently running on. We have to do that here - * because in 32bit we couldn't load a 64bit linear address. - */ - lgdt early_gdt_descr(%rip) - - /* set up data segments */ - xorl %eax,%eax - movl %eax,%ds - movl %eax,%ss - movl %eax,%es - - /* - * We don't really need to load %fs or %gs, but load them anyway - * to kill any stale realmode selectors. This allows execution - * under VT hardware. - */ - movl %eax,%fs - movl %eax,%gs - - /* Set up %gs. - * - * The base of %gs always points to the bottom of the irqstack - * union. If the stack protector canary is enabled, it is - * located at %gs:40. Note that, on SMP, the boot cpu uses - * init data section till per cpu areas are set up. - */ - movl $MSR_GS_BASE,%ecx - movl initial_gs(%rip),%eax - movl initial_gs+4(%rip),%edx - wrmsr - - /* esi is pointer to real mode structure with interesting info. - pass it to C */ - movl %esi, %edi - - /* Finally jump to run C code and to be on real kernel address - * Since we are running on identity-mapped space we have to jump - * to the full 64bit address, this is only possible as indirect - * jump. In addition we need to ensure %cs is set so we make this - * a far return. - */ - movq initial_code(%rip),%rax - pushq $0 # fake return address to stop unwinder - pushq $__KERNEL_CS # set correct cs - pushq %rax # target address in negative space - lretq - - /* SMP bootup changes these two */ - __REFDATA - .align 8 - ENTRY(initial_code) - .quad x86_64_start_kernel - ENTRY(initial_gs) - .quad INIT_PER_CPU_VAR(irq_stack_union) - - ENTRY(stack_start) - .quad init_thread_union+THREAD_SIZE-8 - .word 0 - __FINITDATA - -bad_address: - jmp bad_address - - .section ".init.text","ax" -#ifdef CONFIG_EARLY_PRINTK - .globl early_idt_handlers -early_idt_handlers: - i = 0 - .rept NUM_EXCEPTION_VECTORS - movl $i, %esi - jmp early_idt_handler - i = i + 1 - .endr -#endif - -ENTRY(early_idt_handler) -#ifdef CONFIG_EARLY_PRINTK - cmpl $2,early_recursion_flag(%rip) - jz 1f - incl early_recursion_flag(%rip) - GET_CR2_INTO_RCX - movq %rcx,%r9 - xorl %r8d,%r8d # zero for error code - movl %esi,%ecx # get vector number - # Test %ecx against mask of vectors that push error code. - cmpl $31,%ecx - ja 0f - movl $1,%eax - salq %cl,%rax - testl $0x27d00,%eax - je 0f - popq %r8 # get error code -0: movq 0(%rsp),%rcx # get ip - movq 8(%rsp),%rdx # get cs - xorl %eax,%eax - leaq early_idt_msg(%rip),%rdi - call early_printk - cmpl $2,early_recursion_flag(%rip) - jz 1f - call dump_stack -#ifdef CONFIG_KALLSYMS - leaq early_idt_ripmsg(%rip),%rdi - movq 0(%rsp),%rsi # get rip again - call __print_symbol -#endif -#endif /* EARLY_PRINTK */ -1: hlt - jmp 1b - -#ifdef CONFIG_EARLY_PRINTK -early_recursion_flag: - .long 0 - -early_idt_msg: - .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" -early_idt_ripmsg: - .asciz "RIP %s\n" -#endif /* CONFIG_EARLY_PRINTK */ - .previous - -#define NEXT_PAGE(name) \ - .balign PAGE_SIZE; \ -ENTRY(name) - -/* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ - i = i + 1 ; \ - .endr - - .data - /* - * This default setting generates an ident mapping at address 0x100000 - * and a mapping for the kernel that precisely maps virtual address - * 0xffffffff80000000 to physical address 0x000000. (always using - * 2Mbyte large pages provided by PAE mode) - */ -NEXT_PAGE(init_level4_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .org init_level4_pgt + L4_START_KERNEL*8, 0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE - -NEXT_PAGE(level3_ident_pgt) - .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .fill 511,8,0 - -NEXT_PAGE(level3_kernel_pgt) - .fill L3_START_KERNEL,8,0 - /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ - .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE - .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE - -NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 - -NEXT_PAGE(level1_fixmap_pgt) - .fill 512,8,0 - -NEXT_PAGE(level2_ident_pgt) - /* Since I easily can, map the first 1G. - * Don't set NX because code runs from these pages. - */ - PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) - -NEXT_PAGE(level2_kernel_pgt) - /* - * 512 MB kernel mapping. We spend a full page on this pagetable - * anyway. - * - * The kernel code+data+bss must not be bigger than that. - * - * (NOTE: at +512MB starts the module area, see MODULES_VADDR. - * If you want to increase this then increase MODULES_VADDR - * too.) - */ - PMDS(0, __PAGE_KERNEL_LARGE_EXEC, - KERNEL_IMAGE_SIZE/PMD_SIZE) - -NEXT_PAGE(level2_spare_pgt) - .fill 512, 8, 0 - -#undef PMDS -#undef NEXT_PAGE - - .data - .align 16 - .globl early_gdt_descr -early_gdt_descr: - .word GDT_ENTRIES*8-1 -early_gdt_descr_base: - .quad INIT_PER_CPU_VAR(gdt_page) - -ENTRY(phys_base) - /* This must match the first entry in level2_kernel_pgt */ - .quad 0x0000000000000000 - -#include "../../x86/xen/xen-head.S" - - .section .bss, "aw", @nobits - .align L1_CACHE_BYTES -ENTRY(idt_table) - .skip IDT_ENTRIES * 16 - - .align L1_CACHE_BYTES -ENTRY(nmi_idt_table) - .skip IDT_ENTRIES * 16 - - __PAGE_ALIGNED_BSS - .align PAGE_SIZE -ENTRY(empty_zero_page) - .skip PAGE_SIZE diff --git a/ANDROID_3.4.5/arch/x86/kernel/hpet.c b/ANDROID_3.4.5/arch/x86/kernel/hpet.c deleted file mode 100644 index ad0de0c2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/hpet.c +++ /dev/null @@ -1,1209 +0,0 @@ -#include <linux/clocksource.h> -#include <linux/clockchips.h> -#include <linux/interrupt.h> -#include <linux/export.h> -#include <linux/delay.h> -#include <linux/errno.h> -#include <linux/i8253.h> -#include <linux/slab.h> -#include <linux/hpet.h> -#include <linux/init.h> -#include <linux/cpu.h> -#include <linux/pm.h> -#include <linux/io.h> - -#include <asm/fixmap.h> -#include <asm/hpet.h> -#include <asm/time.h> - -#define HPET_MASK CLOCKSOURCE_MASK(32) - -/* FSEC = 10^-15 - NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000L - -#define HPET_DEV_USED_BIT 2 -#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) -#define HPET_DEV_VALID 0x8 -#define HPET_DEV_FSB_CAP 0x1000 -#define HPET_DEV_PERI_CAP 0x2000 - -#define HPET_MIN_CYCLES 128 -#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) - -/* - * HPET address is set in acpi/boot.c, when an ACPI entry exists - */ -unsigned long hpet_address; -u8 hpet_blockid; /* OS timer block num */ -u8 hpet_msi_disable; - -#ifdef CONFIG_PCI_MSI -static unsigned long hpet_num_timers; -#endif -static void __iomem *hpet_virt_address; - -struct hpet_dev { - struct clock_event_device evt; - unsigned int num; - int cpu; - unsigned int irq; - unsigned int flags; - char name[10]; -}; - -inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) -{ - return container_of(evtdev, struct hpet_dev, evt); -} - -inline unsigned int hpet_readl(unsigned int a) -{ - return readl(hpet_virt_address + a); -} - -static inline void hpet_writel(unsigned int d, unsigned int a) -{ - writel(d, hpet_virt_address + a); -} - -#ifdef CONFIG_X86_64 -#include <asm/pgtable.h> -#endif - -static inline void hpet_set_mapping(void) -{ - hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); -#ifdef CONFIG_X86_64 - __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE); -#endif -} - -static inline void hpet_clear_mapping(void) -{ - iounmap(hpet_virt_address); - hpet_virt_address = NULL; -} - -/* - * HPET command line enable / disable - */ -static int boot_hpet_disable; -int hpet_force_user; -static int hpet_verbose; - -static int __init hpet_setup(char *str) -{ - if (str) { - if (!strncmp("disable", str, 7)) - boot_hpet_disable = 1; - if (!strncmp("force", str, 5)) - hpet_force_user = 1; - if (!strncmp("verbose", str, 7)) - hpet_verbose = 1; - } - return 1; -} -__setup("hpet=", hpet_setup); - -static int __init disable_hpet(char *str) -{ - boot_hpet_disable = 1; - return 1; -} -__setup("nohpet", disable_hpet); - -static inline int is_hpet_capable(void) -{ - return !boot_hpet_disable && hpet_address; -} - -/* - * HPET timer interrupt enable / disable - */ -static int hpet_legacy_int_enabled; - -/** - * is_hpet_enabled - check whether the hpet timer interrupt is enabled - */ -int is_hpet_enabled(void) -{ - return is_hpet_capable() && hpet_legacy_int_enabled; -} -EXPORT_SYMBOL_GPL(is_hpet_enabled); - -static void _hpet_print_config(const char *function, int line) -{ - u32 i, timers, l, h; - printk(KERN_INFO "hpet: %s(%d):\n", function, line); - l = hpet_readl(HPET_ID); - h = hpet_readl(HPET_PERIOD); - timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h); - l = hpet_readl(HPET_CFG); - h = hpet_readl(HPET_STATUS); - printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h); - l = hpet_readl(HPET_COUNTER); - h = hpet_readl(HPET_COUNTER+4); - printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); - - for (i = 0; i < timers; i++) { - l = hpet_readl(HPET_Tn_CFG(i)); - h = hpet_readl(HPET_Tn_CFG(i)+4); - printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", - i, l, h); - l = hpet_readl(HPET_Tn_CMP(i)); - h = hpet_readl(HPET_Tn_CMP(i)+4); - printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", - i, l, h); - l = hpet_readl(HPET_Tn_ROUTE(i)); - h = hpet_readl(HPET_Tn_ROUTE(i)+4); - printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", - i, l, h); - } -} - -#define hpet_print_config() \ -do { \ - if (hpet_verbose) \ - _hpet_print_config(__FUNCTION__, __LINE__); \ -} while (0) - -/* - * When the hpet driver (/dev/hpet) is enabled, we need to reserve - * timer 0 and timer 1 in case of RTC emulation. - */ -#ifdef CONFIG_HPET - -static void hpet_reserve_msi_timers(struct hpet_data *hd); - -static void hpet_reserve_platform_timers(unsigned int id) -{ - struct hpet __iomem *hpet = hpet_virt_address; - struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; - unsigned int nrtimers, i; - struct hpet_data hd; - - nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - - memset(&hd, 0, sizeof(hd)); - hd.hd_phys_address = hpet_address; - hd.hd_address = hpet; - hd.hd_nirqs = nrtimers; - hpet_reserve_timer(&hd, 0); - -#ifdef CONFIG_HPET_EMULATE_RTC - hpet_reserve_timer(&hd, 1); -#endif - - /* - * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 - * is wrong for i8259!) not the output IRQ. Many BIOS writers - * don't bother configuring *any* comparator interrupts. - */ - hd.hd_irq[0] = HPET_LEGACY_8254; - hd.hd_irq[1] = HPET_LEGACY_RTC; - - for (i = 2; i < nrtimers; timer++, i++) { - hd.hd_irq[i] = (readl(&timer->hpet_config) & - Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; - } - - hpet_reserve_msi_timers(&hd); - - hpet_alloc(&hd); - -} -#else -static void hpet_reserve_platform_timers(unsigned int id) { } -#endif - -/* - * Common hpet info - */ -static unsigned long hpet_freq; - -static void hpet_legacy_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt); -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt); - -/* - * The hpet clock event device - */ -static struct clock_event_device hpet_clockevent = { - .name = "hpet", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = hpet_legacy_set_mode, - .set_next_event = hpet_legacy_next_event, - .irq = 0, - .rating = 50, -}; - -static void hpet_stop_counter(void) -{ - unsigned long cfg = hpet_readl(HPET_CFG); - cfg &= ~HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); -} - -static void hpet_reset_counter(void) -{ - hpet_writel(0, HPET_COUNTER); - hpet_writel(0, HPET_COUNTER + 4); -} - -static void hpet_start_counter(void) -{ - unsigned int cfg = hpet_readl(HPET_CFG); - cfg |= HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); -} - -static void hpet_restart_counter(void) -{ - hpet_stop_counter(); - hpet_reset_counter(); - hpet_start_counter(); -} - -static void hpet_resume_device(void) -{ - force_hpet_resume(); -} - -static void hpet_resume_counter(struct clocksource *cs) -{ - hpet_resume_device(); - hpet_restart_counter(); -} - -static void hpet_enable_legacy_int(void) -{ - unsigned int cfg = hpet_readl(HPET_CFG); - - cfg |= HPET_CFG_LEGACY; - hpet_writel(cfg, HPET_CFG); - hpet_legacy_int_enabled = 1; -} - -static void hpet_legacy_clockevent_register(void) -{ - /* Start HPET legacy interrupts */ - hpet_enable_legacy_int(); - - /* - * Start hpet with the boot cpu mask and make it - * global after the IO_APIC has been initialized. - */ - hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); - clockevents_config_and_register(&hpet_clockevent, hpet_freq, - HPET_MIN_PROG_DELTA, 0x7FFFFFFF); - global_clock_event = &hpet_clockevent; - printk(KERN_DEBUG "hpet clockevent registered\n"); -} - -static int hpet_setup_msi_irq(unsigned int irq); - -static void hpet_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt, int timer) -{ - unsigned int cfg, cmp, now; - uint64_t delta; - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - hpet_stop_counter(); - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; - delta >>= evt->shift; - now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned int) delta; - cfg = hpet_readl(HPET_Tn_CFG(timer)); - /* Make sure we use edge triggered interrupts */ - cfg &= ~HPET_TN_LEVEL; - cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | - HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - hpet_writel(cmp, HPET_Tn_CMP(timer)); - udelay(1); - /* - * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL - * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL - * bit is automatically cleared after the first write. - * (See AMD-8111 HyperTransport I/O Hub Data Sheet, - * Publication # 24674) - */ - hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer)); - hpet_start_counter(); - hpet_print_config(); - break; - - case CLOCK_EVT_MODE_ONESHOT: - cfg = hpet_readl(HPET_Tn_CFG(timer)); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - cfg = hpet_readl(HPET_Tn_CFG(timer)); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - break; - - case CLOCK_EVT_MODE_RESUME: - if (timer == 0) { - hpet_enable_legacy_int(); - } else { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - hpet_setup_msi_irq(hdev->irq); - disable_irq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); - enable_irq(hdev->irq); - } - hpet_print_config(); - break; - } -} - -static int hpet_next_event(unsigned long delta, - struct clock_event_device *evt, int timer) -{ - u32 cnt; - s32 res; - - cnt = hpet_readl(HPET_COUNTER); - cnt += (u32) delta; - hpet_writel(cnt, HPET_Tn_CMP(timer)); - - /* - * HPETs are a complete disaster. The compare register is - * based on a equal comparison and neither provides a less - * than or equal functionality (which would require to take - * the wraparound into account) nor a simple count down event - * mode. Further the write to the comparator register is - * delayed internally up to two HPET clock cycles in certain - * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even - * longer delays. We worked around that by reading back the - * compare register, but that required another workaround for - * ICH9,10 chips where the first readout after write can - * return the old stale value. We already had a minimum - * programming delta of 5us enforced, but a NMI or SMI hitting - * between the counter readout and the comparator write can - * move us behind that point easily. Now instead of reading - * the compare register back several times, we make the ETIME - * decision based on the following: Return ETIME if the - * counter value after the write is less than HPET_MIN_CYCLES - * away from the event or if the counter is already ahead of - * the event. The minimum programming delta for the generic - * clockevents code is set to 1.5 * HPET_MIN_CYCLES. - */ - res = (s32)(cnt - hpet_readl(HPET_COUNTER)); - - return res < HPET_MIN_CYCLES ? -ETIME : 0; -} - -static void hpet_legacy_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - hpet_set_mode(mode, evt, 0); -} - -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - return hpet_next_event(delta, evt, 0); -} - -/* - * HPET MSI Support - */ -#ifdef CONFIG_PCI_MSI - -static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); -static struct hpet_dev *hpet_devs; - -void hpet_msi_unmask(struct irq_data *data) -{ - struct hpet_dev *hdev = data->handler_data; - unsigned int cfg; - - /* unmask it */ - cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); - cfg |= HPET_TN_FSB; - hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); -} - -void hpet_msi_mask(struct irq_data *data) -{ - struct hpet_dev *hdev = data->handler_data; - unsigned int cfg; - - /* mask it */ - cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); - cfg &= ~HPET_TN_FSB; - hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); -} - -void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg) -{ - hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); - hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); -} - -void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg) -{ - msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); - msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); - msg->address_hi = 0; -} - -static void hpet_msi_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - hpet_set_mode(mode, evt, hdev->num); -} - -static int hpet_msi_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - return hpet_next_event(delta, evt, hdev->num); -} - -static int hpet_setup_msi_irq(unsigned int irq) -{ - if (arch_setup_hpet_msi(irq, hpet_blockid)) { - destroy_irq(irq); - return -EINVAL; - } - return 0; -} - -static int hpet_assign_irq(struct hpet_dev *dev) -{ - unsigned int irq; - - irq = create_irq_nr(0, -1); - if (!irq) - return -EINVAL; - - irq_set_handler_data(irq, dev); - - if (hpet_setup_msi_irq(irq)) - return -EINVAL; - - dev->irq = irq; - return 0; -} - -static irqreturn_t hpet_interrupt_handler(int irq, void *data) -{ - struct hpet_dev *dev = (struct hpet_dev *)data; - struct clock_event_device *hevt = &dev->evt; - - if (!hevt->event_handler) { - printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", - dev->num); - return IRQ_HANDLED; - } - - hevt->event_handler(hevt); - return IRQ_HANDLED; -} - -static int hpet_setup_irq(struct hpet_dev *dev) -{ - - if (request_irq(dev->irq, hpet_interrupt_handler, - IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, - dev->name, dev)) - return -1; - - disable_irq(dev->irq); - irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); - enable_irq(dev->irq); - - printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", - dev->name, dev->irq); - - return 0; -} - -/* This should be called in specific @cpu */ -static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) -{ - struct clock_event_device *evt = &hdev->evt; - - WARN_ON(cpu != smp_processor_id()); - if (!(hdev->flags & HPET_DEV_VALID)) - return; - - if (hpet_setup_msi_irq(hdev->irq)) - return; - - hdev->cpu = cpu; - per_cpu(cpu_hpet_dev, cpu) = hdev; - evt->name = hdev->name; - hpet_setup_irq(hdev); - evt->irq = hdev->irq; - - evt->rating = 110; - evt->features = CLOCK_EVT_FEAT_ONESHOT; - if (hdev->flags & HPET_DEV_PERI_CAP) - evt->features |= CLOCK_EVT_FEAT_PERIODIC; - - evt->set_mode = hpet_msi_set_mode; - evt->set_next_event = hpet_msi_next_event; - evt->cpumask = cpumask_of(hdev->cpu); - - clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, - 0x7FFFFFFF); -} - -#ifdef CONFIG_HPET -/* Reserve at least one timer for userspace (/dev/hpet) */ -#define RESERVE_TIMERS 1 -#else -#define RESERVE_TIMERS 0 -#endif - -static void hpet_msi_capability_lookup(unsigned int start_timer) -{ - unsigned int id; - unsigned int num_timers; - unsigned int num_timers_used = 0; - int i; - - if (hpet_msi_disable) - return; - - if (boot_cpu_has(X86_FEATURE_ARAT)) - return; - id = hpet_readl(HPET_ID); - - num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); - num_timers++; /* Value read out starts from 0 */ - hpet_print_config(); - - hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); - if (!hpet_devs) - return; - - hpet_num_timers = num_timers; - - for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { - struct hpet_dev *hdev = &hpet_devs[num_timers_used]; - unsigned int cfg = hpet_readl(HPET_Tn_CFG(i)); - - /* Only consider HPET timer with MSI support */ - if (!(cfg & HPET_TN_FSB_CAP)) - continue; - - hdev->flags = 0; - if (cfg & HPET_TN_PERIODIC_CAP) - hdev->flags |= HPET_DEV_PERI_CAP; - hdev->num = i; - - sprintf(hdev->name, "hpet%d", i); - if (hpet_assign_irq(hdev)) - continue; - - hdev->flags |= HPET_DEV_FSB_CAP; - hdev->flags |= HPET_DEV_VALID; - num_timers_used++; - if (num_timers_used == num_possible_cpus()) - break; - } - - printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n", - num_timers, num_timers_used); -} - -#ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) -{ - int i; - - if (!hpet_devs) - return; - - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; - - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - - hd->hd_irq[hdev->num] = hdev->irq; - hpet_reserve_timer(hd, hdev->num); - } -} -#endif - -static struct hpet_dev *hpet_get_unused_timer(void) -{ - int i; - - if (!hpet_devs) - return NULL; - - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; - - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - if (test_and_set_bit(HPET_DEV_USED_BIT, - (unsigned long *)&hdev->flags)) - continue; - return hdev; - } - return NULL; -} - -struct hpet_work_struct { - struct delayed_work work; - struct completion complete; -}; - -static void hpet_work(struct work_struct *w) -{ - struct hpet_dev *hdev; - int cpu = smp_processor_id(); - struct hpet_work_struct *hpet_work; - - hpet_work = container_of(w, struct hpet_work_struct, work.work); - - hdev = hpet_get_unused_timer(); - if (hdev) - init_one_hpet_msi_clockevent(hdev, cpu); - - complete(&hpet_work->complete); -} - -static int hpet_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) -{ - unsigned long cpu = (unsigned long)hcpu; - struct hpet_work_struct work; - struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); - - switch (action & 0xf) { - case CPU_ONLINE: - INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); - init_completion(&work.complete); - /* FIXME: add schedule_work_on() */ - schedule_delayed_work_on(cpu, &work.work, 0); - wait_for_completion(&work.complete); - destroy_timer_on_stack(&work.work.timer); - break; - case CPU_DEAD: - if (hdev) { - free_irq(hdev->irq, hdev); - hdev->flags &= ~HPET_DEV_USED; - per_cpu(cpu_hpet_dev, cpu) = NULL; - } - break; - } - return NOTIFY_OK; -} -#else - -static int hpet_setup_msi_irq(unsigned int irq) -{ - return 0; -} -static void hpet_msi_capability_lookup(unsigned int start_timer) -{ - return; -} - -#ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) -{ - return; -} -#endif - -static int hpet_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) -{ - return NOTIFY_OK; -} - -#endif - -/* - * Clock source related code - */ -static cycle_t read_hpet(struct clocksource *cs) -{ - return (cycle_t)hpet_readl(HPET_COUNTER); -} - -static struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = HPET_MASK, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_resume_counter, -#ifdef CONFIG_X86_64 - .archdata = { .vclock_mode = VCLOCK_HPET }, -#endif -}; - -static int hpet_clocksource_register(void) -{ - u64 start, now; - cycle_t t1; - - /* Start the counter */ - hpet_restart_counter(); - - /* Verify whether hpet counter works */ - t1 = hpet_readl(HPET_COUNTER); - rdtscll(start); - - /* - * We don't know the TSC frequency yet, but waiting for - * 200000 TSC cycles is safe: - * 4 GHz == 50us - * 1 GHz == 200us - */ - do { - rep_nop(); - rdtscll(now); - } while ((now - start) < 200000UL); - - if (t1 == hpet_readl(HPET_COUNTER)) { - printk(KERN_WARNING - "HPET counter not counting. HPET disabled\n"); - return -ENODEV; - } - - clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); - return 0; -} - -/** - * hpet_enable - Try to setup the HPET timer. Returns 1 on success. - */ -int __init hpet_enable(void) -{ - unsigned long hpet_period; - unsigned int id; - u64 freq; - int i; - - if (!is_hpet_capable()) - return 0; - - hpet_set_mapping(); - - /* - * Read the period and check for a sane value: - */ - hpet_period = hpet_readl(HPET_PERIOD); - - /* - * AMD SB700 based systems with spread spectrum enabled use a - * SMM based HPET emulation to provide proper frequency - * setting. The SMM code is initialized with the first HPET - * register access and takes some time to complete. During - * this time the config register reads 0xffffffff. We check - * for max. 1000 loops whether the config register reads a non - * 0xffffffff value to make sure that HPET is up and running - * before we go further. A counting loop is safe, as the HPET - * access takes thousands of CPU cycles. On non SB700 based - * machines this check is only done once and has no side - * effects. - */ - for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { - if (i == 1000) { - printk(KERN_WARNING - "HPET config register value = 0xFFFFFFFF. " - "Disabling HPET\n"); - goto out_nohpet; - } - } - - if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) - goto out_nohpet; - - /* - * The period is a femto seconds value. Convert it to a - * frequency. - */ - freq = FSEC_PER_SEC; - do_div(freq, hpet_period); - hpet_freq = freq; - - /* - * Read the HPET ID register to retrieve the IRQ routing - * information and the number of channels - */ - id = hpet_readl(HPET_ID); - hpet_print_config(); - -#ifdef CONFIG_HPET_EMULATE_RTC - /* - * The legacy routing mode needs at least two channels, tick timer - * and the rtc emulation channel. - */ - if (!(id & HPET_ID_NUMBER)) - goto out_nohpet; -#endif - - if (hpet_clocksource_register()) - goto out_nohpet; - - if (id & HPET_ID_LEGSUP) { - hpet_legacy_clockevent_register(); - return 1; - } - return 0; - -out_nohpet: - hpet_clear_mapping(); - hpet_address = 0; - return 0; -} - -/* - * Needs to be late, as the reserve_timer code calls kalloc ! - * - * Not a problem on i386 as hpet_enable is called from late_time_init, - * but on x86_64 it is necessary ! - */ -static __init int hpet_late_init(void) -{ - int cpu; - - if (boot_hpet_disable) - return -ENODEV; - - if (!hpet_address) { - if (!force_hpet_address) - return -ENODEV; - - hpet_address = force_hpet_address; - hpet_enable(); - } - - if (!hpet_virt_address) - return -ENODEV; - - if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) - hpet_msi_capability_lookup(2); - else - hpet_msi_capability_lookup(0); - - hpet_reserve_platform_timers(hpet_readl(HPET_ID)); - hpet_print_config(); - - if (hpet_msi_disable) - return 0; - - if (boot_cpu_has(X86_FEATURE_ARAT)) - return 0; - - for_each_online_cpu(cpu) { - hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); - } - - /* This notifier should be called after workqueue is ready */ - hotcpu_notifier(hpet_cpuhp_notify, -20); - - return 0; -} -fs_initcall(hpet_late_init); - -void hpet_disable(void) -{ - if (is_hpet_capable() && hpet_virt_address) { - unsigned int cfg = hpet_readl(HPET_CFG); - - if (hpet_legacy_int_enabled) { - cfg &= ~HPET_CFG_LEGACY; - hpet_legacy_int_enabled = 0; - } - cfg &= ~HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); - } -} - -#ifdef CONFIG_HPET_EMULATE_RTC - -/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET - * is enabled, we support RTC interrupt functionality in software. - * RTC has 3 kinds of interrupts: - * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock - * is updated - * 2) Alarm Interrupt - generate an interrupt at a specific time of day - * 3) Periodic Interrupt - generate periodic interrupt, with frequencies - * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) - * (1) and (2) above are implemented using polling at a frequency of - * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt - * overhead. (DEFAULT_RTC_INT_FREQ) - * For (3), we use interrupts at 64Hz or user specified periodic - * frequency, whichever is higher. - */ -#include <linux/mc146818rtc.h> -#include <linux/rtc.h> -#include <asm/rtc.h> - -#define DEFAULT_RTC_INT_FREQ 64 -#define DEFAULT_RTC_SHIFT 6 -#define RTC_NUM_INTS 1 - -static unsigned long hpet_rtc_flags; -static int hpet_prev_update_sec; -static struct rtc_time hpet_alarm_time; -static unsigned long hpet_pie_count; -static u32 hpet_t1_cmp; -static u32 hpet_default_delta; -static u32 hpet_pie_delta; -static unsigned long hpet_pie_limit; - -static rtc_irq_handler irq_handler; - -/* - * Check that the hpet counter c1 is ahead of the c2 - */ -static inline int hpet_cnt_ahead(u32 c1, u32 c2) -{ - return (s32)(c2 - c1) < 0; -} - -/* - * Registers a IRQ handler. - */ -int hpet_register_irq_handler(rtc_irq_handler handler) -{ - if (!is_hpet_enabled()) - return -ENODEV; - if (irq_handler) - return -EBUSY; - - irq_handler = handler; - - return 0; -} -EXPORT_SYMBOL_GPL(hpet_register_irq_handler); - -/* - * Deregisters the IRQ handler registered with hpet_register_irq_handler() - * and does cleanup. - */ -void hpet_unregister_irq_handler(rtc_irq_handler handler) -{ - if (!is_hpet_enabled()) - return; - - irq_handler = NULL; - hpet_rtc_flags = 0; -} -EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); - -/* - * Timer 1 for RTC emulation. We use one shot mode, as periodic mode - * is not supported by all HPET implementations for timer 1. - * - * hpet_rtc_timer_init() is called when the rtc is initialized. - */ -int hpet_rtc_timer_init(void) -{ - unsigned int cfg, cnt, delta; - unsigned long flags; - - if (!is_hpet_enabled()) - return 0; - - if (!hpet_default_delta) { - uint64_t clc; - - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; - hpet_default_delta = clc; - } - - if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) - delta = hpet_default_delta; - else - delta = hpet_pie_delta; - - local_irq_save(flags); - - cnt = delta + hpet_readl(HPET_COUNTER); - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; - - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - local_irq_restore(flags); - - return 1; -} -EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); - -static void hpet_disable_rtc_channel(void) -{ - unsigned long cfg; - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T1_CFG); -} - -/* - * The functions below are called from rtc driver. - * Return 0 if HPET is not being used. - * Otherwise do the necessary changes and return 1. - */ -int hpet_mask_rtc_irq_bit(unsigned long bit_mask) -{ - if (!is_hpet_enabled()) - return 0; - - hpet_rtc_flags &= ~bit_mask; - if (unlikely(!hpet_rtc_flags)) - hpet_disable_rtc_channel(); - - return 1; -} -EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); - -int hpet_set_rtc_irq_bit(unsigned long bit_mask) -{ - unsigned long oldbits = hpet_rtc_flags; - - if (!is_hpet_enabled()) - return 0; - - hpet_rtc_flags |= bit_mask; - - if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE)) - hpet_prev_update_sec = -1; - - if (!oldbits) - hpet_rtc_timer_init(); - - return 1; -} -EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit); - -int hpet_set_alarm_time(unsigned char hrs, unsigned char min, - unsigned char sec) -{ - if (!is_hpet_enabled()) - return 0; - - hpet_alarm_time.tm_hour = hrs; - hpet_alarm_time.tm_min = min; - hpet_alarm_time.tm_sec = sec; - - return 1; -} -EXPORT_SYMBOL_GPL(hpet_set_alarm_time); - -int hpet_set_periodic_freq(unsigned long freq) -{ - uint64_t clc; - - if (!is_hpet_enabled()) - return 0; - - if (freq <= DEFAULT_RTC_INT_FREQ) - hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; - else { - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - do_div(clc, freq); - clc >>= hpet_clockevent.shift; - hpet_pie_delta = clc; - hpet_pie_limit = 0; - } - return 1; -} -EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); - -int hpet_rtc_dropped_irq(void) -{ - return is_hpet_enabled(); -} -EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); - -static void hpet_rtc_timer_reinit(void) -{ - unsigned int delta; - int lost_ints = -1; - - if (unlikely(!hpet_rtc_flags)) - hpet_disable_rtc_channel(); - - if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) - delta = hpet_default_delta; - else - delta = hpet_pie_delta; - - /* - * Increment the comparator value until we are ahead of the - * current count. - */ - do { - hpet_t1_cmp += delta; - hpet_writel(hpet_t1_cmp, HPET_T1_CMP); - lost_ints++; - } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); - - if (lost_ints) { - if (hpet_rtc_flags & RTC_PIE) - hpet_pie_count += lost_ints; - if (printk_ratelimit()) - printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", - lost_ints); - } -} - -irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) -{ - struct rtc_time curr_time; - unsigned long rtc_int_flag = 0; - - hpet_rtc_timer_reinit(); - memset(&curr_time, 0, sizeof(struct rtc_time)); - - if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) - get_rtc_time(&curr_time); - - if (hpet_rtc_flags & RTC_UIE && - curr_time.tm_sec != hpet_prev_update_sec) { - if (hpet_prev_update_sec >= 0) - rtc_int_flag = RTC_UF; - hpet_prev_update_sec = curr_time.tm_sec; - } - - if (hpet_rtc_flags & RTC_PIE && - ++hpet_pie_count >= hpet_pie_limit) { - rtc_int_flag |= RTC_PF; - hpet_pie_count = 0; - } - - if (hpet_rtc_flags & RTC_AIE && - (curr_time.tm_sec == hpet_alarm_time.tm_sec) && - (curr_time.tm_min == hpet_alarm_time.tm_min) && - (curr_time.tm_hour == hpet_alarm_time.tm_hour)) - rtc_int_flag |= RTC_AF; - - if (rtc_int_flag) { - rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); - if (irq_handler) - irq_handler(rtc_int_flag, dev_id); - } - return IRQ_HANDLED; -} -EXPORT_SYMBOL_GPL(hpet_rtc_interrupt); -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/hw_breakpoint.c b/ANDROID_3.4.5/arch/x86/kernel/hw_breakpoint.c deleted file mode 100644 index 02f07634..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/hw_breakpoint.c +++ /dev/null @@ -1,524 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) 2007 Alan Stern - * Copyright (C) 2009 IBM Corporation - * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> - * - * Authors: Alan Stern <stern@rowland.harvard.edu> - * K.Prasad <prasad@linux.vnet.ibm.com> - * Frederic Weisbecker <fweisbec@gmail.com> - */ - -/* - * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, - * using the CPU's debug registers. - */ - -#include <linux/perf_event.h> -#include <linux/hw_breakpoint.h> -#include <linux/irqflags.h> -#include <linux/notifier.h> -#include <linux/kallsyms.h> -#include <linux/kprobes.h> -#include <linux/percpu.h> -#include <linux/kdebug.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/smp.h> - -#include <asm/hw_breakpoint.h> -#include <asm/processor.h> -#include <asm/debugreg.h> - -/* Per cpu debug control register value */ -DEFINE_PER_CPU(unsigned long, cpu_dr7); -EXPORT_PER_CPU_SYMBOL(cpu_dr7); - -/* Per cpu debug address registers values */ -static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); - -/* - * Stores the breakpoints currently in use on each breakpoint address - * register for each cpus - */ -static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); - - -static inline unsigned long -__encode_dr7(int drnum, unsigned int len, unsigned int type) -{ - unsigned long bp_info; - - bp_info = (len | type) & 0xf; - bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); - bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); - - return bp_info; -} - -/* - * Encode the length, type, Exact, and Enable bits for a particular breakpoint - * as stored in debug register 7. - */ -unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) -{ - return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; -} - -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - -/* - * Install a perf counter breakpoint. - * - * We seek a free debug address register and use it for this - * breakpoint. Eventually we enable it in the debug control register. - * - * Atomic: we hold the counter->ctx->lock and we only handle variables - * and registers local to this cpu. - */ -int arch_install_hw_breakpoint(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned long *dr7; - int i; - - for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); - - if (!*slot) { - *slot = bp; - break; - } - } - - if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) - return -EBUSY; - - set_debugreg(info->address, i); - __this_cpu_write(cpu_debugreg[i], info->address); - - dr7 = &__get_cpu_var(cpu_dr7); - *dr7 |= encode_dr7(i, info->len, info->type); - - set_debugreg(*dr7, 7); - - return 0; -} - -/* - * Uninstall the breakpoint contained in the given counter. - * - * First we search the debug address register it uses and then we disable - * it. - * - * Atomic: we hold the counter->ctx->lock and we only handle variables - * and registers local to this cpu. - */ -void arch_uninstall_hw_breakpoint(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned long *dr7; - int i; - - for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); - - if (*slot == bp) { - *slot = NULL; - break; - } - } - - if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) - return; - - dr7 = &__get_cpu_var(cpu_dr7); - *dr7 &= ~__encode_dr7(i, info->len, info->type); - - set_debugreg(*dr7, 7); -} - -static int get_hbp_len(u8 hbp_len) -{ - unsigned int len_in_bytes = 0; - - switch (hbp_len) { - case X86_BREAKPOINT_LEN_1: - len_in_bytes = 1; - break; - case X86_BREAKPOINT_LEN_2: - len_in_bytes = 2; - break; - case X86_BREAKPOINT_LEN_4: - len_in_bytes = 4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - len_in_bytes = 8; - break; -#endif - } - return len_in_bytes; -} - -/* - * Check for virtual address in kernel space. - */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) -{ - unsigned int len; - unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - - va = info->address; - len = get_hbp_len(info->len); - - return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); -} - -int arch_bp_generic_fields(int x86_len, int x86_type, - int *gen_len, int *gen_type) -{ - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - if (x86_len != X86_BREAKPOINT_LEN_X) - return -EINVAL; - - *gen_type = HW_BREAKPOINT_X; - *gen_len = sizeof(long); - return 0; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - - /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_1: - *gen_len = HW_BREAKPOINT_LEN_1; - break; - case X86_BREAKPOINT_LEN_2: - *gen_len = HW_BREAKPOINT_LEN_2; - break; - case X86_BREAKPOINT_LEN_4: - *gen_len = HW_BREAKPOINT_LEN_4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - *gen_len = HW_BREAKPOINT_LEN_8; - break; -#endif - default: - return -EINVAL; - } - - return 0; -} - - -static int arch_build_bp_info(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - - info->address = bp->attr.bp_addr; - - /* Type */ - switch (bp->attr.bp_type) { - case HW_BREAKPOINT_W: - info->type = X86_BREAKPOINT_WRITE; - break; - case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - info->type = X86_BREAKPOINT_RW; - break; - case HW_BREAKPOINT_X: - info->type = X86_BREAKPOINT_EXECUTE; - /* - * x86 inst breakpoints need to have a specific undefined len. - * But we still need to check userspace is not trying to setup - * an unsupported length, to get a range breakpoint for example. - */ - if (bp->attr.bp_len == sizeof(long)) { - info->len = X86_BREAKPOINT_LEN_X; - return 0; - } - default: - return -EINVAL; - } - - /* Len */ - switch (bp->attr.bp_len) { - case HW_BREAKPOINT_LEN_1: - info->len = X86_BREAKPOINT_LEN_1; - break; - case HW_BREAKPOINT_LEN_2: - info->len = X86_BREAKPOINT_LEN_2; - break; - case HW_BREAKPOINT_LEN_4: - info->len = X86_BREAKPOINT_LEN_4; - break; -#ifdef CONFIG_X86_64 - case HW_BREAKPOINT_LEN_8: - info->len = X86_BREAKPOINT_LEN_8; - break; -#endif - default: - return -EINVAL; - } - - return 0; -} -/* - * Validate the arch-specific HW Breakpoint register settings - */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned int align; - int ret; - - - ret = arch_build_bp_info(bp); - if (ret) - return ret; - - ret = -EINVAL; - - switch (info->len) { - case X86_BREAKPOINT_LEN_1: - align = 0; - break; - case X86_BREAKPOINT_LEN_2: - align = 1; - break; - case X86_BREAKPOINT_LEN_4: - align = 3; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - align = 7; - break; -#endif - default: - return ret; - } - - /* - * Check that the low-order bits of the address are appropriate - * for the alignment implied by len. - */ - if (info->address & align) - return -EINVAL; - - return 0; -} - -/* - * Dump the debug register contents to the user. - * We can't dump our per cpu values because it - * may contain cpu wide breakpoint, something that - * doesn't belong to the current task. - * - * TODO: include non-ptrace user breakpoints (perf) - */ -void aout_dump_debugregs(struct user *dump) -{ - int i; - int dr7 = 0; - struct perf_event *bp; - struct arch_hw_breakpoint *info; - struct thread_struct *thread = ¤t->thread; - - for (i = 0; i < HBP_NUM; i++) { - bp = thread->ptrace_bps[i]; - - if (bp && !bp->attr.disabled) { - dump->u_debugreg[i] = bp->attr.bp_addr; - info = counter_arch_bp(bp); - dr7 |= encode_dr7(i, info->len, info->type); - } else { - dump->u_debugreg[i] = 0; - } - } - - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - - dump->u_debugreg[7] = dr7; -} -EXPORT_SYMBOL_GPL(aout_dump_debugregs); - -/* - * Release the user breakpoints used by ptrace - */ -void flush_ptrace_hw_breakpoint(struct task_struct *tsk) -{ - int i; - struct thread_struct *t = &tsk->thread; - - for (i = 0; i < HBP_NUM; i++) { - unregister_hw_breakpoint(t->ptrace_bps[i]); - t->ptrace_bps[i] = NULL; - } -} - -void hw_breakpoint_restore(void) -{ - set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); - set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); - set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); - set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(__this_cpu_read(cpu_dr7), 7); -} -EXPORT_SYMBOL_GPL(hw_breakpoint_restore); - -/* - * Handle debug exception notifications. - * - * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. - * - * NOTIFY_DONE returned if one of the following conditions is true. - * i) When the causative address is from user-space and the exception - * is a valid one, i.e. not triggered as a result of lazy debug register - * switching - * ii) When there are more bits than trap<n> set in DR6 register (such - * as BD, BS or BT) indicating that more than one debug condition is - * met and requires some more action in do_debug(). - * - * NOTIFY_STOP returned for all other cases - * - */ -static int __kprobes hw_breakpoint_handler(struct die_args *args) -{ - int i, cpu, rc = NOTIFY_STOP; - struct perf_event *bp; - unsigned long dr7, dr6; - unsigned long *dr6_p; - - /* The DR6 value is pointed by args->err */ - dr6_p = (unsigned long *)ERR_PTR(args->err); - dr6 = *dr6_p; - - /* If it's a single step, TRAP bits are random */ - if (dr6 & DR_STEP) - return NOTIFY_DONE; - - /* Do an early return if no trap bits are set in DR6 */ - if ((dr6 & DR_TRAP_BITS) == 0) - return NOTIFY_DONE; - - get_debugreg(dr7, 7); - /* Disable breakpoints during exception handling */ - set_debugreg(0UL, 7); - /* - * Assert that local interrupts are disabled - * Reset the DRn bits in the virtualized register value. - * The ptrace trigger routine will add in whatever is needed. - */ - current->thread.debugreg6 &= ~DR_TRAP_BITS; - cpu = get_cpu(); - - /* Handle all the breakpoints that were triggered */ - for (i = 0; i < HBP_NUM; ++i) { - if (likely(!(dr6 & (DR_TRAP0 << i)))) - continue; - - /* - * The counter may be concurrently released but that can only - * occur from a call_rcu() path. We can then safely fetch - * the breakpoint, use its callback, touch its counter - * while we are in an rcu_read_lock() path. - */ - rcu_read_lock(); - - bp = per_cpu(bp_per_reg[i], cpu); - /* - * Reset the 'i'th TRAP bit in dr6 to denote completion of - * exception handling - */ - (*dr6_p) &= ~(DR_TRAP0 << i); - /* - * bp can be NULL due to lazy debug register switching - * or due to concurrent perf counter removing. - */ - if (!bp) { - rcu_read_unlock(); - break; - } - - perf_bp_event(bp, args->regs); - - /* - * Set up resume flag to avoid breakpoint recursion when - * returning back to origin. - */ - if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) - args->regs->flags |= X86_EFLAGS_RF; - - rcu_read_unlock(); - } - /* - * Further processing in do_debug() is needed for a) user-space - * breakpoints (to generate signals) and b) when the system has - * taken exception due to multiple causes - */ - if ((current->thread.debugreg6 & DR_TRAP_BITS) || - (dr6 & (~DR_TRAP_BITS))) - rc = NOTIFY_DONE; - - set_debugreg(dr7, 7); - put_cpu(); - - return rc; -} - -/* - * Handle debug exception notifications. - */ -int __kprobes hw_breakpoint_exceptions_notify( - struct notifier_block *unused, unsigned long val, void *data) -{ - if (val != DIE_DEBUG) - return NOTIFY_DONE; - - return hw_breakpoint_handler(data); -} - -void hw_breakpoint_pmu_read(struct perf_event *bp) -{ - /* TODO */ -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/i386_ksyms_32.c b/ANDROID_3.4.5/arch/x86/kernel/i386_ksyms_32.c deleted file mode 100644 index 9c3bd4a2..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/i386_ksyms_32.c +++ /dev/null @@ -1,38 +0,0 @@ -#include <linux/module.h> - -#include <asm/checksum.h> -#include <asm/pgtable.h> -#include <asm/desc.h> -#include <asm/ftrace.h> - -#ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ -EXPORT_SYMBOL(mcount); -#endif - -/* - * Note, this is a prototype to get at the symbol for - * the export, but dont use it from C code, it is used - * by assembly code and is not using C calling convention! - */ -#ifndef CONFIG_X86_CMPXCHG64 -extern void cmpxchg8b_emu(void); -EXPORT_SYMBOL(cmpxchg8b_emu); -#endif - -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); - -EXPORT_SYMBOL(__get_user_1); -EXPORT_SYMBOL(__get_user_2); -EXPORT_SYMBOL(__get_user_4); - -EXPORT_SYMBOL(__put_user_1); -EXPORT_SYMBOL(__put_user_2); -EXPORT_SYMBOL(__put_user_4); -EXPORT_SYMBOL(__put_user_8); - -EXPORT_SYMBOL(strstr); - -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(empty_zero_page); diff --git a/ANDROID_3.4.5/arch/x86/kernel/i387.c b/ANDROID_3.4.5/arch/x86/kernel/i387.c deleted file mode 100644 index 2d6e6498..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/i387.c +++ /dev/null @@ -1,830 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - */ -#include <linux/module.h> -#include <linux/regset.h> -#include <linux/sched.h> -#include <linux/slab.h> - -#include <asm/sigcontext.h> -#include <asm/processor.h> -#include <asm/math_emu.h> -#include <asm/uaccess.h> -#include <asm/ptrace.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/user.h> - -#ifdef CONFIG_X86_64 -# include <asm/sigcontext32.h> -# include <asm/user32.h> -#else -# define save_i387_xstate_ia32 save_i387_xstate -# define restore_i387_xstate_ia32 restore_i387_xstate -# define _fpstate_ia32 _fpstate -# define _xstate_ia32 _xstate -# define sig_xstate_ia32_size sig_xstate_size -# define fx_sw_reserved_ia32 fx_sw_reserved -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -#endif - -/* - * Were we in an interrupt that interrupted kernel mode? - * - * We can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - */ -static inline bool interrupted_kernel_fpu_idle(void) -{ - return !__thread_has_fpu(current) && - (read_cr0() & X86_CR0_TS); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static inline bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode_vm(regs); -} - -/* - * Can we use the FPU in kernel mode with the - * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. - */ -bool irq_fpu_usable(void) -{ - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); -} -EXPORT_SYMBOL(irq_fpu_usable); - -void kernel_fpu_begin(void) -{ - struct task_struct *me = current; - - WARN_ON_ONCE(!irq_fpu_usable()); - preempt_disable(); - if (__thread_has_fpu(me)) { - __save_init_fpu(me); - __thread_clear_has_fpu(me); - /* We do 'stts()' in kernel_fpu_end() */ - } else { - percpu_write(fpu_owner_task, NULL); - clts(); - } -} -EXPORT_SYMBOL(kernel_fpu_begin); - -void kernel_fpu_end(void) -{ - stts(); - preempt_enable(); -} -EXPORT_SYMBOL(kernel_fpu_end); - -void unlazy_fpu(struct task_struct *tsk) -{ - preempt_disable(); - if (__thread_has_fpu(tsk)) { - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - } else - tsk->fpu_counter = 0; - preempt_enable(); -} -EXPORT_SYMBOL(unlazy_fpu); - -#ifdef CONFIG_MATH_EMULATION -# define HAVE_HWFP (boot_cpu_data.hard_math) -#else -# define HAVE_HWFP 1 -#endif - -static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); -unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); -static struct i387_fxsave_struct fx_scratch __cpuinitdata; - -static void __cpuinit mxcsr_feature_mask_init(void) -{ - unsigned long mask = 0; - - clts(); - if (cpu_has_fxsr) { - memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (fx_scratch)); - mask = fx_scratch.mxcsr_mask; - if (mask == 0) - mask = 0x0000ffbf; - } - mxcsr_feature_mask &= mask; - stts(); -} - -static void __cpuinit init_thread_xstate(void) -{ - /* - * Note that xstate_size might be overwriten later during - * xsave_init(). - */ - - if (!HAVE_HWFP) { - /* - * Disable xsave as we do not support it if i387 - * emulation is enabled. - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct i387_soft_struct); - return; - } - - if (cpu_has_fxsr) - xstate_size = sizeof(struct i387_fxsave_struct); - else - xstate_size = sizeof(struct i387_fsave_struct); -} - -/* - * Called at bootup to set up the initial FPU state that is later cloned - * into all processes. - */ - -void __cpuinit fpu_init(void) -{ - unsigned long cr0; - unsigned long cr4_mask = 0; - - if (cpu_has_fxsr) - cr4_mask |= X86_CR4_OSFXSR; - if (cpu_has_xmm) - cr4_mask |= X86_CR4_OSXMMEXCPT; - if (cr4_mask) - set_in_cr4(cr4_mask); - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!HAVE_HWFP) - cr0 |= X86_CR0_EM; - write_cr0(cr0); - - if (!smp_processor_id()) - init_thread_xstate(); - - mxcsr_feature_mask_init(); - /* clean state in init */ - current_thread_info()->status = 0; - clear_used_math(); -} - -void fpu_finit(struct fpu *fpu) -{ - if (!HAVE_HWFP) { - finit_soft_fpu(&fpu->state->soft); - return; - } - - if (cpu_has_fxsr) { - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - memset(fx, 0, xstate_size); - fx->cwd = 0x37f; - if (cpu_has_xmm) - fx->mxcsr = MXCSR_DEFAULT; - } else { - struct i387_fsave_struct *fp = &fpu->state->fsave; - memset(fp, 0, xstate_size); - fp->cwd = 0xffff037fu; - fp->swd = 0xffff0000u; - fp->twd = 0xffffffffu; - fp->fos = 0xffff0000u; - } -} -EXPORT_SYMBOL_GPL(fpu_finit); - -/* - * The _current_ task is using the FPU for the first time - * so initialize it and set the mxcsr to its default - * value at reset if we support XMM instructions and then - * remember the current task has used the FPU. - */ -int init_fpu(struct task_struct *tsk) -{ - int ret; - - if (tsk_used_math(tsk)) { - if (HAVE_HWFP && tsk == current) - unlazy_fpu(tsk); - tsk->thread.fpu.last_cpu = ~0; - return 0; - } - - /* - * Memory allocation at the first usage of the FPU and other state. - */ - ret = fpu_alloc(&tsk->thread.fpu); - if (ret) - return ret; - - fpu_finit(&tsk->thread.fpu); - - set_stopped_child_used_math(tsk); - return 0; -} -EXPORT_SYMBOL_GPL(init_fpu); - -/* - * The xstateregs_active() routine is the same as the fpregs_active() routine, - * as the "regset->n" for the xstate regset will be updated based on the feature - * capabilites supported by the xsave. - */ -int fpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - return tsk_used_math(target) ? regset->n : 0; -} - -int xfpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; -} - -int xfpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_fxsr) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); -} - -int xfpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_fxsr) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); - - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - - /* - * update the header bits in the xsave header, indicating the - * presence of FP and SSE state. - */ - if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - return ret; -} - -int xstateregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_xsave) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - /* - * Copy the 48bytes defined by the software first into the xstate - * memory layout in the thread struct, so that we can copy the entire - * xstateregs to the user using one user_regset_copyout(). - */ - memcpy(&target->thread.fpu.state->fxsave.sw_reserved, - xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - - /* - * Copy the xstate memory layout. - */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->xsave, 0, -1); - return ret; -} - -int xstateregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - struct xsave_hdr_struct *xsave_hdr; - - if (!cpu_has_xsave) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->xsave, 0, -1); - - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - - xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; - - xsave_hdr->xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; - - return ret; -} - -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr(unsigned short twd) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - - return tmp; -} - -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) -#define FP_EXP_TAG_VALID 0 -#define FP_EXP_TAG_ZERO 1 -#define FP_EXP_TAG_SPECIAL 2 -#define FP_EXP_TAG_EMPTY 3 - -static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) -{ - struct _fpxreg *st; - u32 tos = (fxsave->swd >> 11) & 7; - u32 twd = (unsigned long) fxsave->twd; - u32 tag; - u32 ret = 0xffff0000u; - int i; - - for (i = 0; i < 8; i++, twd >>= 1) { - if (twd & 0x1) { - st = FPREG_ADDR(fxsave, (i - tos) & 7); - - switch (st->exponent & 0x7fff) { - case 0x7fff: - tag = FP_EXP_TAG_SPECIAL; - break; - case 0x0000: - if (!st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3]) - tag = FP_EXP_TAG_ZERO; - else - tag = FP_EXP_TAG_SPECIAL; - break; - default: - if (st->significand[3] & 0x8000) - tag = FP_EXP_TAG_VALID; - else - tag = FP_EXP_TAG_SPECIAL; - break; - } - } else { - tag = FP_EXP_TAG_EMPTY; - } - ret |= tag << (2 * i); - } - return ret; -} - -/* - * FXSR floating point environment conversions. - */ - -static void -convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; - struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - env->cwd = fxsave->cwd | 0xffff0000u; - env->swd = fxsave->swd | 0xffff0000u; - env->twd = twd_fxsr_to_i387(fxsave); - -#ifdef CONFIG_X86_64 - env->fip = fxsave->rip; - env->foo = fxsave->rdp; - /* - * should be actually ds/cs at fpu exception time, but - * that information is not available in 64bit mode. - */ - env->fcs = task_pt_regs(tsk)->cs; - if (tsk == current) { - savesegment(ds, env->fos); - } else { - env->fos = tsk->thread.ds; - } - env->fos |= 0xffff0000; -#else - env->fip = fxsave->fip; - env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); - env->foo = fxsave->foo; - env->fos = fxsave->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(to[0])); -} - -static void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) - -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; - struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - fxsave->cwd = env->cwd; - fxsave->swd = env->swd; - fxsave->twd = twd_i387_to_fxsr(env->twd); - fxsave->fop = (u16) ((u32) env->fcs >> 16); -#ifdef CONFIG_X86_64 - fxsave->rip = env->fip; - fxsave->rdp = env->foo; - /* cs and ds ignored */ -#else - fxsave->fip = env->fip; - fxsave->fcs = (env->fcs & 0xffff); - fxsave->foo = env->foo; - fxsave->fos = env->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(from[0])); -} - -int fpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct user_i387_ia32_struct env; - int ret; - - ret = init_fpu(target); - if (ret) - return ret; - - if (!HAVE_HWFP) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, - -1); - } - - sanitize_i387_state(target); - - if (kbuf && pos == 0 && count == sizeof(env)) { - convert_from_fxsr(kbuf, target); - return 0; - } - - convert_from_fxsr(&env, target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); -} - -int fpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct user_i387_ia32_struct env; - int ret; - - ret = init_fpu(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - if (!HAVE_HWFP) - return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) { - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, -1); - } - - if (pos > 0 || count < sizeof(env)) - convert_from_fxsr(&env, target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); - if (!ret) - convert_to_fxsr(target, &env); - - /* - * update the header bit in the xsave header, indicating the - * presence of FP. - */ - if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; - return ret; -} - -/* - * Signal frame handlers. - */ - -static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; - - fp->status = fp->swd; - if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) - return -1; - return 1; -} - -static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - struct user_i387_ia32_struct env; - int err = 0; - - convert_from_fxsr(&env, tsk); - if (__copy_to_user(buf, &env, sizeof(env))) - return -1; - - err |= __put_user(fx->swd, &buf->status); - err |= __put_user(X86_FXSR_MAGIC, &buf->magic); - if (err) - return -1; - - if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) - return -1; - return 1; -} - -static int save_i387_xsave(void __user *buf) -{ - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fx = buf; - int err = 0; - - - sanitize_i387_state(tsk); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. - * This will enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware applications can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - if (save_i387_fxsave(fx) < 0) - return -1; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, - sizeof(struct _fpx_sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_ia32_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return -1; - - return 1; -} - -int save_i387_xstate_ia32(void __user *buf) -{ - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - struct task_struct *tsk = current; - - if (!used_math()) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) - return -EACCES; - /* - * This will cause a "finit" to be triggered by the next - * attempted FPU operation by the 'current' process. - */ - clear_used_math(); - - if (!HAVE_HWFP) { - return fpregs_soft_get(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) ? -1 : 1; - } - - unlazy_fpu(tsk); - - if (cpu_has_xsave) - return save_i387_xsave(fp); - if (cpu_has_fxsr) - return save_i387_fxsave(fp); - else - return save_i387_fsave(fp); -} - -static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - - return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, - sizeof(struct i387_fsave_struct)); -} - -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, - unsigned int size) -{ - struct task_struct *tsk = current; - struct user_i387_ia32_struct env; - int err; - - err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], - size); - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - if (err || __copy_from_user(&env, buf, sizeof(env))) - return 1; - convert_to_fxsr(tsk, &env); - - return 0; -} - -static int restore_i387_xsave(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - struct _fpstate_ia32 __user *fx_user = - ((struct _fpstate_ia32 __user *) buf); - struct i387_fxsave_struct __user *fx = - (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; - struct xsave_hdr_struct *xsave_hdr = - ¤t->thread.fpu.state->xsave.xsave_hdr; - u64 mask; - int err; - - if (check_for_xstate(fx, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - - xsave_hdr->xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; - - /* - * Init the state that is not present in the memory layout - * and enabled by the OS. - */ - mask = ~(pcntxt_mask & ~mask); - xsave_hdr->xstate_bv &= mask; - - return err; -fx_only: - /* - * Couldn't find the extended state information in the memory - * layout. Restore the FP/SSE and init the other extended state - * enabled by the OS. - */ - xsave_hdr->xstate_bv = XSTATE_FPSSE; - return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); -} - -int restore_i387_xstate_ia32(void __user *buf) -{ - int err; - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - - if (HAVE_HWFP) - clear_fpu(tsk); - - if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) - return -EACCES; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (HAVE_HWFP) { - if (cpu_has_xsave) - err = restore_i387_xsave(buf); - else if (cpu_has_fxsr) - err = restore_i387_fxsave(fp, sizeof(struct - i387_fxsave_struct)); - else - err = restore_i387_fsave(fp); - } else { - err = fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) != 0; - } - set_used_math(); - - return err; -} - -/* - * FPU state for core dumps. - * This is only used for a.out dumps now. - * It is declared generically using elf_fpregset_t (which is - * struct user_i387_struct) but is in fact only used for 32-bit - * dumps, so on 64-bit it is really struct user_i387_ia32_struct. - */ -int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) -{ - struct task_struct *tsk = current; - int fpvalid; - - fpvalid = !!used_math(); - if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, - 0, sizeof(struct user_i387_ia32_struct), - fpu, NULL); - - return fpvalid; -} -EXPORT_SYMBOL(dump_fpu); - -#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/i8237.c b/ANDROID_3.4.5/arch/x86/kernel/i8237.c deleted file mode 100644 index 8eeaa81d..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/i8237.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * 8237A DMA controller suspend functions. - * - * Written by Pierre Ossman, 2005. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ - -#include <linux/init.h> -#include <linux/syscore_ops.h> - -#include <asm/dma.h> - -/* - * This module just handles suspend/resume issues with the - * 8237A DMA controller (used for ISA and LPC). - * Allocation is handled in kernel/dma.c and normal usage is - * in asm/dma.h. - */ - -static void i8237A_resume(void) -{ - unsigned long flags; - int i; - - flags = claim_dma_lock(); - - dma_outb(0, DMA1_RESET_REG); - dma_outb(0, DMA2_RESET_REG); - - for (i = 0; i < 8; i++) { - set_dma_addr(i, 0x000000); - /* DMA count is a bit weird so this is not 0 */ - set_dma_count(i, 1); - } - - /* Enable cascade DMA or channel 0-3 won't work */ - enable_dma(4); - - release_dma_lock(flags); -} - -static struct syscore_ops i8237_syscore_ops = { - .resume = i8237A_resume, -}; - -static int __init i8237A_init_ops(void) -{ - register_syscore_ops(&i8237_syscore_ops); - return 0; -} -device_initcall(i8237A_init_ops); diff --git a/ANDROID_3.4.5/arch/x86/kernel/i8253.c b/ANDROID_3.4.5/arch/x86/kernel/i8253.c deleted file mode 100644 index f2b96de3..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/i8253.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * 8253/PIT functions - * - */ -#include <linux/clockchips.h> -#include <linux/module.h> -#include <linux/timex.h> -#include <linux/i8253.h> - -#include <asm/hpet.h> -#include <asm/time.h> -#include <asm/smp.h> - -/* - * HPET replaces the PIT, when enabled. So we need to know, which of - * the two timers is used - */ -struct clock_event_device *global_clock_event; - -void __init setup_pit_timer(void) -{ - clockevent_i8253_init(true); - global_clock_event = &i8253_clockevent; -} - -#ifndef CONFIG_X86_64 -static int __init init_pit_clocksource(void) -{ - /* - * Several reasons not to register PIT as a clocksource: - * - * - On SMP PIT does not scale due to i8253_lock - * - when HPET is enabled - * - when local APIC timer is active (PIT is switched off) - */ - if (num_possible_cpus() > 1 || is_hpet_enabled() || - i8253_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) - return 0; - - return clocksource_i8253_init(); -} -arch_initcall(init_pit_clocksource); -#endif /* !CONFIG_X86_64 */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/i8259.c b/ANDROID_3.4.5/arch/x86/kernel/i8259.c deleted file mode 100644 index 36d1853e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/i8259.c +++ /dev/null @@ -1,401 +0,0 @@ -#include <linux/linkage.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/syscore_ops.h> -#include <linux/bitops.h> -#include <linux/acpi.h> -#include <linux/io.h> -#include <linux/delay.h> - -#include <linux/atomic.h> -#include <asm/timer.h> -#include <asm/hw_irq.h> -#include <asm/pgtable.h> -#include <asm/desc.h> -#include <asm/apic.h> -#include <asm/i8259.h> - -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - */ -static void init_8259A(int auto_eoi); - -static int i8259A_auto_eoi; -DEFINE_RAW_SPINLOCK(i8259A_lock); - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -static void mask_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - raw_spin_unlock_irqrestore(&i8259A_lock, flags); -} - -static void disable_8259A_irq(struct irq_data *data) -{ - mask_8259A_irq(data->irq); -} - -static void unmask_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - raw_spin_unlock_irqrestore(&i8259A_lock, flags); -} - -static void enable_8259A_irq(struct irq_data *data) -{ - unmask_8259A_irq(data->irq); -} - -static int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<<irq; - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - if (irq < 8) - ret = inb(PIC_MASTER_CMD) & mask; - else - ret = inb(PIC_SLAVE_CMD) & (mask >> 8); - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -static void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<<irq); - irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, - i8259A_chip.name); - enable_irq(irq); -} - -/* - * This function assumes to be called rarely. Switching between - * 8259A registers is slow. - * This has to be protected by the irq controller spinlock - * before being called. - */ -static inline int i8259A_irq_real(unsigned int irq) -{ - int value; - int irqmask = 1<<irq; - - if (irq < 8) { - outb(0x0B, PIC_MASTER_CMD); /* ISR register */ - value = inb(PIC_MASTER_CMD) & irqmask; - outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */ - return value; - } - outb(0x0B, PIC_SLAVE_CMD); /* ISR register */ - value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); - outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(struct irq_data *data) -{ - unsigned int irq = data->irq; - unsigned int irqmask = 1 << irq; - unsigned long flags; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - /* 'Specific EOI' to slave */ - outb(0x60+(irq&7), PIC_SLAVE_CMD); - /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD); - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ - } - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG - "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .irq_mask = disable_8259A_irq, - .irq_disable = disable_8259A_irq, - .irq_unmask = enable_8259A_irq, - .irq_mask_ack = mask_and_ack_8259A, -}; - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static void i8259A_resume(void) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); -} - -static int i8259A_suspend(void) -{ - save_ELCR(irq_trigger); - return 0; -} - -static void i8259A_shutdown(void) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ -} - -static struct syscore_ops i8259_syscore_ops = { - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static void mask_8259A(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); -} - -static void unmask_8259A(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); -} - -static void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, - to 0x20-0x27 on i386 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); - - /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); - - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - - /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); - /* 8259A-2 is a slave on master's IR2 */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); - /* (slave's support for AEOI in flat mode is to be investigated) */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); - - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.irq_mask_ack = disable_8259A_irq; - else - i8259A_chip.irq_mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); -} - -/* - * make i8259 a driver so that we can select pic functions at run time. the goal - * is to make x86 binary compatible among pc compatible and non-pc compatible - * platforms, such as x86 MID. - */ - -static void legacy_pic_noop(void) { }; -static void legacy_pic_uint_noop(unsigned int unused) { }; -static void legacy_pic_int_noop(int unused) { }; -static int legacy_pic_irq_pending_noop(unsigned int irq) -{ - return 0; -} - -struct legacy_pic null_legacy_pic = { - .nr_legacy_irqs = 0, - .chip = &dummy_irq_chip, - .mask = legacy_pic_uint_noop, - .unmask = legacy_pic_uint_noop, - .mask_all = legacy_pic_noop, - .restore_mask = legacy_pic_noop, - .init = legacy_pic_int_noop, - .irq_pending = legacy_pic_irq_pending_noop, - .make_irq = legacy_pic_uint_noop, -}; - -struct legacy_pic default_legacy_pic = { - .nr_legacy_irqs = NR_IRQS_LEGACY, - .chip = &i8259A_chip, - .mask = mask_8259A_irq, - .unmask = unmask_8259A_irq, - .mask_all = mask_8259A, - .restore_mask = unmask_8259A, - .init = init_8259A, - .irq_pending = i8259A_irq_pending, - .make_irq = make_8259A_irq, -}; - -struct legacy_pic *legacy_pic = &default_legacy_pic; - -static int __init i8259A_init_ops(void) -{ - if (legacy_pic == &default_legacy_pic) - register_syscore_ops(&i8259_syscore_ops); - - return 0; -} - -device_initcall(i8259A_init_ops); diff --git a/ANDROID_3.4.5/arch/x86/kernel/init_task.c b/ANDROID_3.4.5/arch/x86/kernel/init_task.c deleted file mode 100644 index 43e9ccf4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/init_task.c +++ /dev/null @@ -1,42 +0,0 @@ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/fs.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/desc.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); -EXPORT_SYMBOL(init_task); - -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data..cacheline_aligned - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - diff --git a/ANDROID_3.4.5/arch/x86/kernel/io_delay.c b/ANDROID_3.4.5/arch/x86/kernel/io_delay.c deleted file mode 100644 index a979b5bd..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/io_delay.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * I/O delay strategies for inb_p/outb_p - * - * Allow for a DMI based override of port 0x80, needed for certain HP laptops - * and possibly other systems. Also allow for the gradual elimination of - * outb_p/inb_p API uses. - */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/dmi.h> -#include <linux/io.h> - -int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; - -static int __initdata io_delay_override; - -/* - * Paravirt wants native_io_delay to be a constant. - */ -void native_io_delay(void) -{ - switch (io_delay_type) { - default: - case CONFIG_IO_DELAY_TYPE_0X80: - asm volatile ("outb %al, $0x80"); - break; - case CONFIG_IO_DELAY_TYPE_0XED: - asm volatile ("outb %al, $0xed"); - break; - case CONFIG_IO_DELAY_TYPE_UDELAY: - /* - * 2 usecs is an upper-bound for the outb delay but - * note that udelay doesn't have the bus-level - * side-effects that outb does, nor does udelay() have - * precise timings during very early bootup (the delays - * are shorter until calibrated): - */ - udelay(2); - case CONFIG_IO_DELAY_TYPE_NONE: - break; - } -} -EXPORT_SYMBOL(native_io_delay); - -static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) -{ - if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { - pr_notice("%s: using 0xed I/O delay port\n", id->ident); - io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; - } - - return 0; -} - -/* - * Quirk table for systems that misbehave (lock up, etc.) if port - * 0x80 is used: - */ -static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { - { - .callback = dmi_io_delay_0xed_port, - .ident = "Compaq Presario V6000", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30B7") - } - }, - { - .callback = dmi_io_delay_0xed_port, - .ident = "HP Pavilion dv9000z", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30B9") - } - }, - { - .callback = dmi_io_delay_0xed_port, - .ident = "HP Pavilion dv6000", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30B8") - } - }, - { - .callback = dmi_io_delay_0xed_port, - .ident = "HP Pavilion tx1000", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30BF") - } - }, - { - .callback = dmi_io_delay_0xed_port, - .ident = "Presario F700", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30D3") - } - }, - { } -}; - -void __init io_delay_init(void) -{ - if (!io_delay_override) - dmi_check_system(io_delay_0xed_port_dmi_table); -} - -static int __init io_delay_param(char *s) -{ - if (!s) - return -EINVAL; - - if (!strcmp(s, "0x80")) - io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; - else if (!strcmp(s, "0xed")) - io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; - else if (!strcmp(s, "udelay")) - io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY; - else if (!strcmp(s, "none")) - io_delay_type = CONFIG_IO_DELAY_TYPE_NONE; - else - return -EINVAL; - - io_delay_override = 1; - return 0; -} - -early_param("io_delay", io_delay_param); diff --git a/ANDROID_3.4.5/arch/x86/kernel/ioport.c b/ANDROID_3.4.5/arch/x86/kernel/ioport.c deleted file mode 100644 index 8c968974..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/ioport.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * This contains the io-permission bitmap code - written by obz, with changes - * by Linus. 32/64 bits code unification by Miguel Botón. - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/ioport.h> -#include <linux/smp.h> -#include <linux/stddef.h> -#include <linux/slab.h> -#include <linux/thread_info.h> -#include <linux/syscalls.h> -#include <linux/bitmap.h> -#include <asm/syscalls.h> - -/* - * this changes the io permissions bitmap in the current task. - */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) -{ - struct thread_struct *t = ¤t->thread; - struct tss_struct *tss; - unsigned int i, max_long, bytes, bytes_updated; - - if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) - return -EINVAL; - if (turn_on && !capable(CAP_SYS_RAWIO)) - return -EPERM; - - /* - * If it's the first ioperm() call in this thread's lifetime, set the - * IO bitmap up. ioperm() is much less timing critical than clone(), - * this is why we delay this operation until now: - */ - if (!t->io_bitmap_ptr) { - unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - - if (!bitmap) - return -ENOMEM; - - memset(bitmap, 0xff, IO_BITMAP_BYTES); - t->io_bitmap_ptr = bitmap; - set_thread_flag(TIF_IO_BITMAP); - } - - /* - * do it in the per-thread copy and in the TSS ... - * - * Disable preemption via get_cpu() - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: - */ - tss = &per_cpu(init_tss, get_cpu()); - - if (turn_on) - bitmap_clear(t->io_bitmap_ptr, from, num); - else - bitmap_set(t->io_bitmap_ptr, from, num); - - /* - * Search for a (possibly new) maximum. This is simple and stupid, - * to keep it obviously correct: - */ - max_long = 0; - for (i = 0; i < IO_BITMAP_LONGS; i++) - if (t->io_bitmap_ptr[i] != ~0UL) - max_long = i; - - bytes = (max_long + 1) * sizeof(unsigned long); - bytes_updated = max(bytes, t->io_bitmap_max); - - t->io_bitmap_max = bytes; - - /* Update the TSS: */ - memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); - - put_cpu(); - - return 0; -} - -/* - * sys_iopl has to be used when you want to access the IO ports - * beyond the 0x3ff range: to get the full 65536 ports bitmapped - * you'd need 8kB of bitmaps/process, which is a bit excessive. - * - * Here we just change the flags value on the stack: we allow - * only the super-user to do it. This depends on the stack-layout - * on system-call entry - see also fork() and the signal handling - * code. - */ -long sys_iopl(unsigned int level, struct pt_regs *regs) -{ - unsigned int old = (regs->flags >> 12) & 3; - struct thread_struct *t = ¤t->thread; - - if (level > 3) - return -EINVAL; - /* Trying to gain more privileges? */ - if (level > old) { - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - } - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); - t->iopl = level << 12; - set_iopl_mask(t->iopl); - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/irq.c b/ANDROID_3.4.5/arch/x86/kernel/irq.c deleted file mode 100644 index 3dafc600..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/irq.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Common interrupt code for 32 and 64 bit - */ -#include <linux/cpu.h> -#include <linux/interrupt.h> -#include <linux/kernel_stat.h> -#include <linux/of.h> -#include <linux/seq_file.h> -#include <linux/smp.h> -#include <linux/ftrace.h> -#include <linux/delay.h> -#include <linux/export.h> - -#include <asm/apic.h> -#include <asm/io_apic.h> -#include <asm/irq.h> -#include <asm/idle.h> -#include <asm/mce.h> -#include <asm/hw_irq.h> - -atomic_t irq_err_count; - -/* Function pointer for generic interrupt vector handling */ -void (*x86_platform_ipi_callback)(void) = NULL; - -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - if (printk_ratelimit()) - pr_err("unexpected IRQ trap at vector %02x\n", irq); - - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But only ack when the APIC is enabled -AK - */ - ack_APIC_irq(); -} - -#define irq_stats(x) (&per_cpu(irq_stat, x)) -/* - * /proc/interrupts printing for arch specific interrupts - */ -int arch_show_interrupts(struct seq_file *p, int prec) -{ - int j; - - seq_printf(p, "%*s: ", prec, "NMI"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); - seq_printf(p, " Non-maskable interrupts\n"); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "%*s: ", prec, "LOC"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); - - seq_printf(p, "%*s: ", prec, "SPU"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "PMI"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); - seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "IWI"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); - seq_printf(p, " IRQ work interrupts\n"); - seq_printf(p, "%*s: ", prec, "RTR"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); - seq_printf(p, " APIC ICR read retries\n"); -#endif - if (x86_platform_ipi_callback) { - seq_printf(p, "%*s: ", prec, "PLT"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); - seq_printf(p, " Platform interrupts\n"); - } -#ifdef CONFIG_SMP - seq_printf(p, "%*s: ", prec, "RES"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "%*s: ", prec, "CAL"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "%*s: ", prec, "TLB"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_THERMAL_VECTOR - seq_printf(p, "%*s: ", prec, "TRM"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); -#endif -#ifdef CONFIG_X86_MCE_THRESHOLD - seq_printf(p, "%*s: ", prec, "THR"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); - seq_printf(p, " Threshold APIC interrupts\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "%*s: ", prec, "MCE"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); - seq_printf(p, " Machine check exceptions\n"); - seq_printf(p, "%*s: ", prec, "MCP"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); - seq_printf(p, " Machine check polls\n"); -#endif - seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); -#endif - return 0; -} - -/* - * /proc/stat helpers - */ -u64 arch_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = irq_stats(cpu)->__nmi_count; - -#ifdef CONFIG_X86_LOCAL_APIC - sum += irq_stats(cpu)->apic_timer_irqs; - sum += irq_stats(cpu)->irq_spurious_count; - sum += irq_stats(cpu)->apic_perf_irqs; - sum += irq_stats(cpu)->apic_irq_work_irqs; - sum += irq_stats(cpu)->icr_read_retry_count; -#endif - if (x86_platform_ipi_callback) - sum += irq_stats(cpu)->x86_platform_ipis; -#ifdef CONFIG_SMP - sum += irq_stats(cpu)->irq_resched_count; - sum += irq_stats(cpu)->irq_call_count; - sum += irq_stats(cpu)->irq_tlb_count; -#endif -#ifdef CONFIG_X86_THERMAL_VECTOR - sum += irq_stats(cpu)->irq_thermal_count; -#endif -#ifdef CONFIG_X86_MCE_THRESHOLD - sum += irq_stats(cpu)->irq_threshold_count; -#endif -#ifdef CONFIG_X86_MCE - sum += per_cpu(mce_exception_count, cpu); - sum += per_cpu(mce_poll_count, cpu); -#endif - return sum; -} - -u64 arch_irq_stat(void) -{ - u64 sum = atomic_read(&irq_err_count); - -#ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); -#endif - return sum; -} - - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -unsigned int __irq_entry do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; - unsigned irq; - - irq_enter(); - exit_idle(); - - irq = __this_cpu_read(vector_irq[vector]); - - if (!handle_irq(irq, regs)) { - ack_APIC_irq(); - - if (printk_ratelimit()) - pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", - __func__, smp_processor_id(), vector, irq); - } - - irq_exit(); - - set_irq_regs(old_regs); - return 1; -} - -/* - * Handler for X86_PLATFORM_IPI_VECTOR. - */ -void smp_x86_platform_ipi(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - ack_APIC_irq(); - - irq_enter(); - - exit_idle(); - - inc_irq_stat(x86_platform_ipis); - - if (x86_platform_ipi_callback) - x86_platform_ipi_callback(); - - irq_exit(); - - set_irq_regs(old_regs); -} - -EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); - -#ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq, vector; - static int warned; - struct irq_desc *desc; - struct irq_data *data; - struct irq_chip *chip; - - for_each_irq_desc(irq, desc) { - int break_affinity = 0; - int set_affinity = 1; - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - /* interrupt's are disabled at this point */ - raw_spin_lock(&desc->lock); - - data = irq_desc_get_irq_data(desc); - affinity = data->affinity; - if (!irq_has_action(irq) || irqd_is_per_cpu(data) || - cpumask_subset(affinity, cpu_online_mask)) { - raw_spin_unlock(&desc->lock); - continue; - } - - /* - * Complete the irq move. This cpu is going down and for - * non intr-remapping case, we can't wait till this interrupt - * arrives at this cpu before completing the irq move. - */ - irq_force_complete_move(irq); - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; - affinity = cpu_all_mask; - } - - chip = irq_data_get_irq_chip(data); - if (!irqd_can_move_in_process_context(data) && chip->irq_mask) - chip->irq_mask(data); - - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, affinity, true); - else if (!(warned++)) - set_affinity = 0; - - /* - * We unmask if the irq was not marked masked by the - * core code. That respects the lazy irq disable - * behaviour. - */ - if (!irqd_can_move_in_process_context(data) && - !irqd_irq_masked(data) && chip->irq_unmask) - chip->irq_unmask(data); - - raw_spin_unlock(&desc->lock); - - if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); - } - - /* - * We can remove mdelay() and then send spuriuous interrupts to - * new cpu targets for all the irqs that were handled previously by - * this cpu. While it works, I have seen spurious interrupt messages - * (nothing wrong but still...). - * - * So for now, retain mdelay(1) and check the IRR and then send those - * interrupts to new targets as this cpu is already offlined... - */ - mdelay(1); - - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irr; - - if (__this_cpu_read(vector_irq[vector]) < 0) - continue; - - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1 << (vector % 32))) { - irq = __this_cpu_read(vector_irq[vector]); - - desc = irq_to_desc(irq); - data = irq_desc_get_irq_data(desc); - chip = irq_data_get_irq_chip(data); - raw_spin_lock(&desc->lock); - if (chip->irq_retrigger) - chip->irq_retrigger(data); - raw_spin_unlock(&desc->lock); - } - } -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/irq_32.c b/ANDROID_3.4.5/arch/x86/kernel/irq_32.c deleted file mode 100644 index 58b7f27c..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/irq_32.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the lowest level x86-specific interrupt - * entry, irq-stacks and irq statistics code. All the remaining - * irq logic is done by the generic kernel/irq/ code and - * by the x86-specific irq controller code. (e.g. i8259.c and - * io_apic.c.) - */ - -#include <linux/module.h> -#include <linux/seq_file.h> -#include <linux/interrupt.h> -#include <linux/kernel_stat.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/delay.h> -#include <linux/uaccess.h> -#include <linux/percpu.h> -#include <linux/mm.h> - -#include <asm/apic.h> - -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - -#ifdef CONFIG_DEBUG_STACKOVERFLOW - -int sysctl_panic_on_stackoverflow __read_mostly; - -/* Debugging check for stack overflow: is there less than 1KB free? */ -static int check_stack_overflow(void) -{ - long sp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (sp) : "0" (THREAD_SIZE - 1)); - - return sp < (sizeof(struct thread_info) + STACK_WARN); -} - -static void print_stack_overflow(void) -{ - printk(KERN_WARNING "low stack detected by irq handler\n"); - dump_stack(); - if (sysctl_panic_on_stackoverflow) - panic("low stack detected by irq handler - check messages\n"); -} - -#else -static inline int check_stack_overflow(void) { return 0; } -static inline void print_stack_overflow(void) { } -#endif - -/* - * per-CPU IRQ handling contexts (thread information and stack) - */ -union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; -} __attribute__((aligned(THREAD_SIZE))); - -static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); -static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); - -static void call_on_stack(void *func, void *stack) -{ - asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" - "movl %%ebx,%%esp \n" - : "=b" (stack) - : "0" (stack), - "D"(func) - : "memory", "cc", "edx", "ecx", "eax"); -} - -static inline int -execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) -{ - union irq_ctx *curctx, *irqctx; - u32 *isp, arg1, arg2; - - curctx = (union irq_ctx *) current_thread_info(); - irqctx = __this_cpu_read(hardirq_ctx); - - /* - * this is where we switch to the IRQ stack. However, if we are - * already using the IRQ stack (because we interrupted a hardirq - * handler) we can't do that and just have to keep using the - * current stack (which is the irq stack already after all) - */ - if (unlikely(curctx == irqctx)) - return 0; - - /* build the stack frame on the IRQ stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* Copy the preempt_count so that the [soft]irq checks work. */ - irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; - - if (unlikely(overflow)) - call_on_stack(print_stack_overflow, isp); - - asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" - "movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (isp) - : "0" (irq), "1" (desc), "2" (isp), - "D" (desc->handle_irq) - : "memory", "cc", "ecx"); - return 1; -} - -/* - * allocate per-cpu stacks for hardirq and for softirq processing - */ -void __cpuinit irq_ctx_init(int cpu) -{ - union irq_ctx *irqctx; - - if (per_cpu(hardirq_ctx, cpu)) - return; - - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(hardirq_ctx, cpu) = irqctx; - - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(softirq_ctx, cpu) = irqctx; - - printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", - cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); -} - -asmlinkage void do_softirq(void) -{ - unsigned long flags; - struct thread_info *curctx; - union irq_ctx *irqctx; - u32 *isp; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - curctx = current_thread_info(); - irqctx = __this_cpu_read(softirq_ctx); - irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* build the stack frame on the softirq stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); - - call_on_stack(__do_softirq, isp); - /* - * Shouldn't happen, we returned above if in_interrupt(): - */ - WARN_ON_ONCE(softirq_count()); - } - - local_irq_restore(flags); -} - -bool handle_irq(unsigned irq, struct pt_regs *regs) -{ - struct irq_desc *desc; - int overflow; - - overflow = check_stack_overflow(); - - desc = irq_to_desc(irq); - if (unlikely(!desc)) - return false; - - if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { - if (unlikely(overflow)) - print_stack_overflow(); - desc->handle_irq(irq, desc); - } - - return true; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/irq_64.c b/ANDROID_3.4.5/arch/x86/kernel/irq_64.c deleted file mode 100644 index d04d3ecd..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/irq_64.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the lowest level x86_64-specific interrupt - * entry and irq statistics code. All the remaining irq logic is - * done by the generic kernel/irq/ code and in the - * x86_64-specific irq controller code. (e.g. i8259.c and - * io_apic.c.) - */ - -#include <linux/kernel_stat.h> -#include <linux/interrupt.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/delay.h> -#include <linux/ftrace.h> -#include <linux/uaccess.h> -#include <linux/smp.h> -#include <asm/io_apic.h> -#include <asm/idle.h> -#include <asm/apic.h> - -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - -int sysctl_panic_on_stackoverflow; - -/* - * Probabilistic stack overflow check: - * - * Only check the stack in process context, because everything else - * runs on the big interrupt stacks. Checking reliably is too expensive, - * so we just check from interrupts. - */ -static inline void stack_overflow_check(struct pt_regs *regs) -{ -#ifdef CONFIG_DEBUG_STACKOVERFLOW -#define STACK_TOP_MARGIN 128 - struct orig_ist *oist; - u64 irq_stack_top, irq_stack_bottom; - u64 estack_top, estack_bottom; - u64 curbase = (u64)task_stack_page(current); - - if (user_mode_vm(regs)) - return; - - if (regs->sp >= curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + STACK_TOP_MARGIN && - regs->sp <= curbase + THREAD_SIZE) - return; - - irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + - STACK_TOP_MARGIN; - irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); - if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) - return; - - oist = &__get_cpu_var(orig_ist); - estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; - estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; - if (regs->sp >= estack_top && regs->sp <= estack_bottom) - return; - - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", - current->comm, curbase, regs->sp, - irq_stack_top, irq_stack_bottom, - estack_top, estack_bottom); - - if (sysctl_panic_on_stackoverflow) - panic("low stack detected by irq handler - check messages\n"); -#endif -} - -bool handle_irq(unsigned irq, struct pt_regs *regs) -{ - struct irq_desc *desc; - - stack_overflow_check(regs); - - desc = irq_to_desc(irq); - if (unlikely(!desc)) - return false; - - generic_handle_irq_desc(irq, desc); - return true; -} - - -extern void call_softirq(void); - -asmlinkage void do_softirq(void) -{ - __u32 pending; - unsigned long flags; - - if (in_interrupt()) - return; - - local_irq_save(flags); - pending = local_softirq_pending(); - /* Switch to interrupt stack */ - if (pending) { - call_softirq(); - WARN_ON_ONCE(softirq_count()); - } - local_irq_restore(flags); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/irq_work.c b/ANDROID_3.4.5/arch/x86/kernel/irq_work.c deleted file mode 100644 index ca8f703a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/irq_work.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * x86 specific code for irq_work - * - * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> - */ - -#include <linux/kernel.h> -#include <linux/irq_work.h> -#include <linux/hardirq.h> -#include <asm/apic.h> - -void smp_irq_work_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_irq_work_irqs); - irq_work_run(); - irq_exit(); -} - -void arch_irq_work_raise(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - if (!cpu_has_apic) - return; - - apic->send_IPI_self(IRQ_WORK_VECTOR); - apic_wait_icr_idle(); -#endif -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/irqinit.c b/ANDROID_3.4.5/arch/x86/kernel/irqinit.c deleted file mode 100644 index 252981af..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/irqinit.c +++ /dev/null @@ -1,327 +0,0 @@ -#include <linux/linkage.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/random.h> -#include <linux/kprobes.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/device.h> -#include <linux/bitops.h> -#include <linux/acpi.h> -#include <linux/io.h> -#include <linux/delay.h> - -#include <linux/atomic.h> -#include <asm/timer.h> -#include <asm/hw_irq.h> -#include <asm/pgtable.h> -#include <asm/desc.h> -#include <asm/apic.h> -#include <asm/setup.h> -#include <asm/i8259.h> -#include <asm/traps.h> -#include <asm/prom.h> - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - -#ifdef CONFIG_X86_32 -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - outb(0, 0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error(get_irq_regs(), 0, X86_TRAP_MF); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { - .handler = math_error_irq, - .name = "fpu", - .flags = IRQF_NO_THREAD, -}; -#endif - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .name = "cascade", - .flags = IRQF_NO_THREAD, -}; - -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... NR_VECTORS - 1] = -1, -}; - -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - -void __init init_ISA_irqs(void) -{ - struct irq_chip *chip = legacy_pic->chip; - const char *name = chip->name; - int i; - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - init_bsp_APIC(); -#endif - legacy_pic->init(0); - - for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) - irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); -} - -void __init init_IRQ(void) -{ - int i; - - /* - * We probably need a better place for this, but it works for - * now ... - */ - x86_add_irq_domains(); - - /* - * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. - * If these IRQ's are handled by legacy interrupt-controllers like PIC, - * then this configuration will likely be static after the boot. If - * these IRQ's are handled by more mordern controllers like IO-APIC, - * then this vector space can be freed and re-used dynamically as the - * irq's migrate etc. - */ - for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) - per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; - - x86_init.irqs.intr_init(); -} - -/* - * Setup the vector to irq mappings. - */ -void setup_vector_irq(int cpu) -{ -#ifndef CONFIG_X86_IO_APIC - int irq; - - /* - * On most of the platforms, legacy PIC delivers the interrupts on the - * boot cpu. But there are certain platforms where PIC interrupts are - * delivered to multiple cpu's. If the legacy IRQ is handled by the - * legacy PIC, for the new cpu that is coming online, setup the static - * legacy vector to irq mapping: - */ - for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) - per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; -#endif - - __setup_vector_irq(cpu); -} - -static void __init smp_intr_init(void) -{ -#ifdef CONFIG_SMP -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ -#define ALLOC_INVTLB_VEC(NR) \ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ - invalidate_interrupt##NR) - - switch (NUM_INVALIDATE_TLB_VECTORS) { - default: - ALLOC_INVTLB_VEC(31); - case 31: - ALLOC_INVTLB_VEC(30); - case 30: - ALLOC_INVTLB_VEC(29); - case 29: - ALLOC_INVTLB_VEC(28); - case 28: - ALLOC_INVTLB_VEC(27); - case 27: - ALLOC_INVTLB_VEC(26); - case 26: - ALLOC_INVTLB_VEC(25); - case 25: - ALLOC_INVTLB_VEC(24); - case 24: - ALLOC_INVTLB_VEC(23); - case 23: - ALLOC_INVTLB_VEC(22); - case 22: - ALLOC_INVTLB_VEC(21); - case 21: - ALLOC_INVTLB_VEC(20); - case 20: - ALLOC_INVTLB_VEC(19); - case 19: - ALLOC_INVTLB_VEC(18); - case 18: - ALLOC_INVTLB_VEC(17); - case 17: - ALLOC_INVTLB_VEC(16); - case 16: - ALLOC_INVTLB_VEC(15); - case 15: - ALLOC_INVTLB_VEC(14); - case 14: - ALLOC_INVTLB_VEC(13); - case 13: - ALLOC_INVTLB_VEC(12); - case 12: - ALLOC_INVTLB_VEC(11); - case 11: - ALLOC_INVTLB_VEC(10); - case 10: - ALLOC_INVTLB_VEC(9); - case 9: - ALLOC_INVTLB_VEC(8); - case 8: - ALLOC_INVTLB_VEC(7); - case 7: - ALLOC_INVTLB_VEC(6); - case 6: - ALLOC_INVTLB_VEC(5); - case 5: - ALLOC_INVTLB_VEC(4); - case 4: - ALLOC_INVTLB_VEC(3); - case 3: - ALLOC_INVTLB_VEC(2); - case 2: - ALLOC_INVTLB_VEC(1); - case 1: - ALLOC_INVTLB_VEC(0); - break; - } - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for generic single function call */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); - - /* IPI used for rebooting/stopping */ - alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); -#endif -#endif /* CONFIG_SMP */ -} - -static void __init apic_intr_init(void) -{ - smp_intr_init(); - -#ifdef CONFIG_X86_THERMAL_VECTOR - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -#ifdef CONFIG_X86_MCE_THRESHOLD - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); -#endif - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI for X86 platform specific use */ - alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* IRQ work interrupts: */ -# ifdef CONFIG_IRQ_WORK - alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt); -# endif - -#endif -} - -void __init native_init_IRQ(void) -{ - int i; - - /* Execute any quirks before the call gates are initialised: */ - x86_init.irqs.pre_vector_init(); - - apic_intr_init(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - i = FIRST_EXTERNAL_VECTOR; - for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { - /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ - set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); - } - - if (!acpi_ioapic && !of_ioapic) - setup_irq(2, &irq2); - -#ifdef CONFIG_X86_32 - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -#endif -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/jump_label.c b/ANDROID_3.4.5/arch/x86/kernel/jump_label.c deleted file mode 100644 index 2889b3d4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/jump_label.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * jump label x86 support - * - * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> - * - */ -#include <linux/jump_label.h> -#include <linux/memory.h> -#include <linux/uaccess.h> -#include <linux/module.h> -#include <linux/list.h> -#include <linux/jhash.h> -#include <linux/cpu.h> -#include <asm/kprobes.h> -#include <asm/alternative.h> - -#ifdef HAVE_JUMP_LABEL - -union jump_code_union { - char code[JUMP_LABEL_NOP_SIZE]; - struct { - char jump; - int offset; - } __attribute__((packed)); -}; - -static void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - void *(*poker)(void *, const void *, size_t)) -{ - union jump_code_union code; - - if (type == JUMP_LABEL_ENABLE) { - code.jump = 0xe9; - code.offset = entry->target - - (entry->code + JUMP_LABEL_NOP_SIZE); - } else - memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); - - (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); -} - -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) -{ - get_online_cpus(); - mutex_lock(&text_mutex); - __jump_label_transform(entry, type, text_poke_smp); - mutex_unlock(&text_mutex); - put_online_cpus(); -} - -__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, - enum jump_label_type type) -{ - __jump_label_transform(entry, type, text_poke_early); -} - -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/kdebugfs.c b/ANDROID_3.4.5/arch/x86/kernel/kdebugfs.c deleted file mode 100644 index 1d5d31ea..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/kdebugfs.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Architecture specific debugfs files - * - * Copyright (C) 2007, Intel Corp. - * Huang Ying <ying.huang@intel.com> - * - * This file is released under the GPLv2. - */ -#include <linux/debugfs.h> -#include <linux/uaccess.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/stat.h> -#include <linux/io.h> -#include <linux/mm.h> - -#include <asm/setup.h> - -struct dentry *arch_debugfs_dir; -EXPORT_SYMBOL(arch_debugfs_dir); - -#ifdef CONFIG_DEBUG_BOOT_PARAMS -struct setup_data_node { - u64 paddr; - u32 type; - u32 len; -}; - -static ssize_t setup_data_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct setup_data_node *node = file->private_data; - unsigned long remain; - loff_t pos = *ppos; - struct page *pg; - void *p; - u64 pa; - - if (pos < 0) - return -EINVAL; - - if (pos >= node->len) - return 0; - - if (count > node->len - pos) - count = node->len - pos; - - pa = node->paddr + sizeof(struct setup_data) + pos; - pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); - if (PageHighMem(pg)) { - p = ioremap_cache(pa, count); - if (!p) - return -ENXIO; - } else - p = __va(pa); - - remain = copy_to_user(user_buf, p, count); - - if (PageHighMem(pg)) - iounmap(p); - - if (remain) - return -EFAULT; - - *ppos = pos + count; - - return count; -} - -static const struct file_operations fops_setup_data = { - .read = setup_data_read, - .open = simple_open, - .llseek = default_llseek, -}; - -static int __init -create_setup_data_node(struct dentry *parent, int no, - struct setup_data_node *node) -{ - struct dentry *d, *type, *data; - char buf[16]; - - sprintf(buf, "%d", no); - d = debugfs_create_dir(buf, parent); - if (!d) - return -ENOMEM; - - type = debugfs_create_x32("type", S_IRUGO, d, &node->type); - if (!type) - goto err_dir; - - data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); - if (!data) - goto err_type; - - return 0; - -err_type: - debugfs_remove(type); -err_dir: - debugfs_remove(d); - return -ENOMEM; -} - -static int __init create_setup_data_nodes(struct dentry *parent) -{ - struct setup_data_node *node; - struct setup_data *data; - int error = -ENOMEM; - struct dentry *d; - struct page *pg; - u64 pa_data; - int no = 0; - - d = debugfs_create_dir("setup_data", parent); - if (!d) - return -ENOMEM; - - pa_data = boot_params.hdr.setup_data; - - while (pa_data) { - node = kmalloc(sizeof(*node), GFP_KERNEL); - if (!node) - goto err_dir; - - pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); - if (PageHighMem(pg)) { - data = ioremap_cache(pa_data, sizeof(*data)); - if (!data) { - kfree(node); - error = -ENXIO; - goto err_dir; - } - } else - data = __va(pa_data); - - node->paddr = pa_data; - node->type = data->type; - node->len = data->len; - error = create_setup_data_node(d, no, node); - pa_data = data->next; - - if (PageHighMem(pg)) - iounmap(data); - if (error) - goto err_dir; - no++; - } - - return 0; - -err_dir: - debugfs_remove(d); - return error; -} - -static struct debugfs_blob_wrapper boot_params_blob = { - .data = &boot_params, - .size = sizeof(boot_params), -}; - -static int __init boot_params_kdebugfs_init(void) -{ - struct dentry *dbp, *version, *data; - int error = -ENOMEM; - - dbp = debugfs_create_dir("boot_params", NULL); - if (!dbp) - return -ENOMEM; - - version = debugfs_create_x16("version", S_IRUGO, dbp, - &boot_params.hdr.version); - if (!version) - goto err_dir; - - data = debugfs_create_blob("data", S_IRUGO, dbp, - &boot_params_blob); - if (!data) - goto err_version; - - error = create_setup_data_nodes(dbp); - if (error) - goto err_data; - - return 0; - -err_data: - debugfs_remove(data); -err_version: - debugfs_remove(version); -err_dir: - debugfs_remove(dbp); - return error; -} -#endif /* CONFIG_DEBUG_BOOT_PARAMS */ - -static int __init arch_kdebugfs_init(void) -{ - int error = 0; - - arch_debugfs_dir = debugfs_create_dir("x86", NULL); - if (!arch_debugfs_dir) - return -ENOMEM; - -#ifdef CONFIG_DEBUG_BOOT_PARAMS - error = boot_params_kdebugfs_init(); -#endif - - return error; -} -arch_initcall(arch_kdebugfs_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/kgdb.c b/ANDROID_3.4.5/arch/x86/kernel/kgdb.c deleted file mode 100644 index 8bfb6146..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/kgdb.c +++ /dev/null @@ -1,813 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - */ - -/* - * Copyright (C) 2004 Amit S. Kale <amitkale@linsyssoft.com> - * Copyright (C) 2000-2001 VERITAS Software Corporation. - * Copyright (C) 2002 Andi Kleen, SuSE Labs - * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd. - * Copyright (C) 2007 MontaVista Software, Inc. - * Copyright (C) 2007-2008 Jason Wessel, Wind River Systems, Inc. - */ -/**************************************************************************** - * Contributor: Lake Stevens Instrument Division$ - * Written by: Glenn Engel $ - * Updated by: Amit Kale<akale@veritas.com> - * Updated by: Tom Rini <trini@kernel.crashing.org> - * Updated by: Jason Wessel <jason.wessel@windriver.com> - * Modified for 386 by Jim Kingdon, Cygnus Support. - * Origianl kgdb, compatibility with 2.1.xx kernel by - * David Grothe <dave@gcom.com> - * Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com> - * X86_64 changes from Andi Kleen's patch merged by Jim Houston - */ -#include <linux/spinlock.h> -#include <linux/kdebug.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/ptrace.h> -#include <linux/sched.h> -#include <linux/delay.h> -#include <linux/kgdb.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/nmi.h> -#include <linux/hw_breakpoint.h> -#include <linux/uaccess.h> -#include <linux/memory.h> - -#include <asm/debugreg.h> -#include <asm/apicdef.h> -#include <asm/apic.h> -#include <asm/nmi.h> - -struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = -{ -#ifdef CONFIG_X86_32 - { "ax", 4, offsetof(struct pt_regs, ax) }, - { "cx", 4, offsetof(struct pt_regs, cx) }, - { "dx", 4, offsetof(struct pt_regs, dx) }, - { "bx", 4, offsetof(struct pt_regs, bx) }, - { "sp", 4, offsetof(struct pt_regs, sp) }, - { "bp", 4, offsetof(struct pt_regs, bp) }, - { "si", 4, offsetof(struct pt_regs, si) }, - { "di", 4, offsetof(struct pt_regs, di) }, - { "ip", 4, offsetof(struct pt_regs, ip) }, - { "flags", 4, offsetof(struct pt_regs, flags) }, - { "cs", 4, offsetof(struct pt_regs, cs) }, - { "ss", 4, offsetof(struct pt_regs, ss) }, - { "ds", 4, offsetof(struct pt_regs, ds) }, - { "es", 4, offsetof(struct pt_regs, es) }, -#else - { "ax", 8, offsetof(struct pt_regs, ax) }, - { "bx", 8, offsetof(struct pt_regs, bx) }, - { "cx", 8, offsetof(struct pt_regs, cx) }, - { "dx", 8, offsetof(struct pt_regs, dx) }, - { "si", 8, offsetof(struct pt_regs, dx) }, - { "di", 8, offsetof(struct pt_regs, di) }, - { "bp", 8, offsetof(struct pt_regs, bp) }, - { "sp", 8, offsetof(struct pt_regs, sp) }, - { "r8", 8, offsetof(struct pt_regs, r8) }, - { "r9", 8, offsetof(struct pt_regs, r9) }, - { "r10", 8, offsetof(struct pt_regs, r10) }, - { "r11", 8, offsetof(struct pt_regs, r11) }, - { "r12", 8, offsetof(struct pt_regs, r12) }, - { "r13", 8, offsetof(struct pt_regs, r13) }, - { "r14", 8, offsetof(struct pt_regs, r14) }, - { "r15", 8, offsetof(struct pt_regs, r15) }, - { "ip", 8, offsetof(struct pt_regs, ip) }, - { "flags", 4, offsetof(struct pt_regs, flags) }, - { "cs", 4, offsetof(struct pt_regs, cs) }, - { "ss", 4, offsetof(struct pt_regs, ss) }, - { "ds", 4, -1 }, - { "es", 4, -1 }, -#endif - { "fs", 4, -1 }, - { "gs", 4, -1 }, -}; - -int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) -{ - if ( -#ifdef CONFIG_X86_32 - regno == GDB_SS || regno == GDB_FS || regno == GDB_GS || -#endif - regno == GDB_SP || regno == GDB_ORIG_AX) - return 0; - - if (dbg_reg_def[regno].offset != -1) - memcpy((void *)regs + dbg_reg_def[regno].offset, mem, - dbg_reg_def[regno].size); - return 0; -} - -char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) -{ - if (regno == GDB_ORIG_AX) { - memcpy(mem, ®s->orig_ax, sizeof(regs->orig_ax)); - return "orig_ax"; - } - if (regno >= DBG_MAX_REG_NUM || regno < 0) - return NULL; - - if (dbg_reg_def[regno].offset != -1) - memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, - dbg_reg_def[regno].size); - -#ifdef CONFIG_X86_32 - switch (regno) { - case GDB_SS: - if (!user_mode_vm(regs)) - *(unsigned long *)mem = __KERNEL_DS; - break; - case GDB_SP: - if (!user_mode_vm(regs)) - *(unsigned long *)mem = kernel_stack_pointer(regs); - break; - case GDB_GS: - case GDB_FS: - *(unsigned long *)mem = 0xFFFF; - break; - } -#endif - return dbg_reg_def[regno].name; -} - -/** - * sleeping_thread_to_gdb_regs - Convert ptrace regs to GDB regs - * @gdb_regs: A pointer to hold the registers in the order GDB wants. - * @p: The &struct task_struct of the desired process. - * - * Convert the register values of the sleeping process in @p to - * the format that GDB expects. - * This function is called when kgdb does not have access to the - * &struct pt_regs and therefore it should fill the gdb registers - * @gdb_regs with what has been saved in &struct thread_struct - * thread field during switch_to. - */ -void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) -{ -#ifndef CONFIG_X86_32 - u32 *gdb_regs32 = (u32 *)gdb_regs; -#endif - gdb_regs[GDB_AX] = 0; - gdb_regs[GDB_BX] = 0; - gdb_regs[GDB_CX] = 0; - gdb_regs[GDB_DX] = 0; - gdb_regs[GDB_SI] = 0; - gdb_regs[GDB_DI] = 0; - gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; -#ifdef CONFIG_X86_32 - gdb_regs[GDB_DS] = __KERNEL_DS; - gdb_regs[GDB_ES] = __KERNEL_DS; - gdb_regs[GDB_PS] = 0; - gdb_regs[GDB_CS] = __KERNEL_CS; - gdb_regs[GDB_PC] = p->thread.ip; - gdb_regs[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_FS] = 0xFFFF; - gdb_regs[GDB_GS] = 0xFFFF; -#else - gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); - gdb_regs32[GDB_CS] = __KERNEL_CS; - gdb_regs32[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_PC] = 0; - gdb_regs[GDB_R8] = 0; - gdb_regs[GDB_R9] = 0; - gdb_regs[GDB_R10] = 0; - gdb_regs[GDB_R11] = 0; - gdb_regs[GDB_R12] = 0; - gdb_regs[GDB_R13] = 0; - gdb_regs[GDB_R14] = 0; - gdb_regs[GDB_R15] = 0; -#endif - gdb_regs[GDB_SP] = p->thread.sp; -} - -static struct hw_breakpoint { - unsigned enabled; - unsigned long addr; - int len; - int type; - struct perf_event * __percpu *pev; -} breakinfo[HBP_NUM]; - -static unsigned long early_dr7; - -static void kgdb_correct_hw_break(void) -{ - int breakno; - - for (breakno = 0; breakno < HBP_NUM; breakno++) { - struct perf_event *bp; - struct arch_hw_breakpoint *info; - int val; - int cpu = raw_smp_processor_id(); - if (!breakinfo[breakno].enabled) - continue; - if (dbg_is_early) { - set_debugreg(breakinfo[breakno].addr, breakno); - early_dr7 |= encode_dr7(breakno, - breakinfo[breakno].len, - breakinfo[breakno].type); - set_debugreg(early_dr7, 7); - continue; - } - bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); - info = counter_arch_bp(bp); - if (bp->attr.disabled != 1) - continue; - bp->attr.bp_addr = breakinfo[breakno].addr; - bp->attr.bp_len = breakinfo[breakno].len; - bp->attr.bp_type = breakinfo[breakno].type; - info->address = breakinfo[breakno].addr; - info->len = breakinfo[breakno].len; - info->type = breakinfo[breakno].type; - val = arch_install_hw_breakpoint(bp); - if (!val) - bp->attr.disabled = 0; - } - if (!dbg_is_early) - hw_breakpoint_restore(); -} - -static int hw_break_reserve_slot(int breakno) -{ - int cpu; - int cnt = 0; - struct perf_event **pevent; - - if (dbg_is_early) - return 0; - - for_each_online_cpu(cpu) { - cnt++; - pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); - if (dbg_reserve_bp_slot(*pevent)) - goto fail; - } - - return 0; - -fail: - for_each_online_cpu(cpu) { - cnt--; - if (!cnt) - break; - pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); - dbg_release_bp_slot(*pevent); - } - return -1; -} - -static int hw_break_release_slot(int breakno) -{ - struct perf_event **pevent; - int cpu; - - if (dbg_is_early) - return 0; - - for_each_online_cpu(cpu) { - pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); - if (dbg_release_bp_slot(*pevent)) - /* - * The debugger is responsible for handing the retry on - * remove failure. - */ - return -1; - } - return 0; -} - -static int -kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) -{ - int i; - - for (i = 0; i < HBP_NUM; i++) - if (breakinfo[i].addr == addr && breakinfo[i].enabled) - break; - if (i == HBP_NUM) - return -1; - - if (hw_break_release_slot(i)) { - printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr); - return -1; - } - breakinfo[i].enabled = 0; - - return 0; -} - -static void kgdb_remove_all_hw_break(void) -{ - int i; - int cpu = raw_smp_processor_id(); - struct perf_event *bp; - - for (i = 0; i < HBP_NUM; i++) { - if (!breakinfo[i].enabled) - continue; - bp = *per_cpu_ptr(breakinfo[i].pev, cpu); - if (!bp->attr.disabled) { - arch_uninstall_hw_breakpoint(bp); - bp->attr.disabled = 1; - continue; - } - if (dbg_is_early) - early_dr7 &= ~encode_dr7(i, breakinfo[i].len, - breakinfo[i].type); - else if (hw_break_release_slot(i)) - printk(KERN_ERR "KGDB: hw bpt remove failed %lx\n", - breakinfo[i].addr); - breakinfo[i].enabled = 0; - } -} - -static int -kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) -{ - int i; - - for (i = 0; i < HBP_NUM; i++) - if (!breakinfo[i].enabled) - break; - if (i == HBP_NUM) - return -1; - - switch (bptype) { - case BP_HARDWARE_BREAKPOINT: - len = 1; - breakinfo[i].type = X86_BREAKPOINT_EXECUTE; - break; - case BP_WRITE_WATCHPOINT: - breakinfo[i].type = X86_BREAKPOINT_WRITE; - break; - case BP_ACCESS_WATCHPOINT: - breakinfo[i].type = X86_BREAKPOINT_RW; - break; - default: - return -1; - } - switch (len) { - case 1: - breakinfo[i].len = X86_BREAKPOINT_LEN_1; - break; - case 2: - breakinfo[i].len = X86_BREAKPOINT_LEN_2; - break; - case 4: - breakinfo[i].len = X86_BREAKPOINT_LEN_4; - break; -#ifdef CONFIG_X86_64 - case 8: - breakinfo[i].len = X86_BREAKPOINT_LEN_8; - break; -#endif - default: - return -1; - } - breakinfo[i].addr = addr; - if (hw_break_reserve_slot(i)) { - breakinfo[i].addr = 0; - return -1; - } - breakinfo[i].enabled = 1; - - return 0; -} - -/** - * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. - * @regs: Current &struct pt_regs. - * - * This function will be called if the particular architecture must - * disable hardware debugging while it is processing gdb packets or - * handling exception. - */ -static void kgdb_disable_hw_debug(struct pt_regs *regs) -{ - int i; - int cpu = raw_smp_processor_id(); - struct perf_event *bp; - - /* Disable hardware debugging while we are in kgdb: */ - set_debugreg(0UL, 7); - for (i = 0; i < HBP_NUM; i++) { - if (!breakinfo[i].enabled) - continue; - if (dbg_is_early) { - early_dr7 &= ~encode_dr7(i, breakinfo[i].len, - breakinfo[i].type); - continue; - } - bp = *per_cpu_ptr(breakinfo[i].pev, cpu); - if (bp->attr.disabled == 1) - continue; - arch_uninstall_hw_breakpoint(bp); - bp->attr.disabled = 1; - } -} - -#ifdef CONFIG_SMP -/** - * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state - * - * On SMP systems, we need to get the attention of the other CPUs - * and get them be in a known state. This should do what is needed - * to get the other CPUs to call kgdb_wait(). Note that on some arches, - * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). - * - * On non-SMP systems, this is not called. - */ -void kgdb_roundup_cpus(unsigned long flags) -{ - apic->send_IPI_allbutself(APIC_DM_NMI); -} -#endif - -/** - * kgdb_arch_handle_exception - Handle architecture specific GDB packets. - * @vector: The error vector of the exception that happened. - * @signo: The signal number of the exception that happened. - * @err_code: The error code of the exception that happened. - * @remcom_in_buffer: The buffer of the packet we have read. - * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. - * @regs: The &struct pt_regs of the current process. - * - * This function MUST handle the 'c' and 's' command packets, - * as well packets to set / remove a hardware breakpoint, if used. - * If there are additional packets which the hardware needs to handle, - * they are handled here. The code should return -1 if it wants to - * process more packets, and a %0 or %1 if it wants to exit from the - * kgdb callback. - */ -int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, - char *remcomInBuffer, char *remcomOutBuffer, - struct pt_regs *linux_regs) -{ - unsigned long addr; - char *ptr; - - switch (remcomInBuffer[0]) { - case 'c': - case 's': - /* try to read optional parameter, pc unchanged if no parm */ - ptr = &remcomInBuffer[1]; - if (kgdb_hex2long(&ptr, &addr)) - linux_regs->ip = addr; - case 'D': - case 'k': - /* clear the trace bit */ - linux_regs->flags &= ~X86_EFLAGS_TF; - atomic_set(&kgdb_cpu_doing_single_step, -1); - - /* set the trace bit if we're stepping */ - if (remcomInBuffer[0] == 's') { - linux_regs->flags |= X86_EFLAGS_TF; - atomic_set(&kgdb_cpu_doing_single_step, - raw_smp_processor_id()); - } - - return 0; - } - - /* this means that we do not want to exit from the handler: */ - return -1; -} - -static inline int -single_step_cont(struct pt_regs *regs, struct die_args *args) -{ - /* - * Single step exception from kernel space to user space so - * eat the exception and continue the process: - */ - printk(KERN_ERR "KGDB: trap/step from kernel to user space, " - "resuming...\n"); - kgdb_arch_handle_exception(args->trapnr, args->signr, - args->err, "c", "", regs); - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; - - return NOTIFY_STOP; -} - -static int was_in_debug_nmi[NR_CPUS]; - -static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) -{ - switch (cmd) { - case NMI_LOCAL: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - was_in_debug_nmi[raw_smp_processor_id()] = 1; - touch_nmi_watchdog(); - return NMI_HANDLED; - } - break; - - case NMI_UNKNOWN: - if (was_in_debug_nmi[raw_smp_processor_id()]) { - was_in_debug_nmi[raw_smp_processor_id()] = 0; - return NMI_HANDLED; - } - break; - default: - /* do nothing */ - break; - } - return NMI_DONE; -} - -static int __kgdb_notify(struct die_args *args, unsigned long cmd) -{ - struct pt_regs *regs = args->regs; - - switch (cmd) { - case DIE_DEBUG: - if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { - if (user_mode(regs)) - return single_step_cont(regs, args); - break; - } else if (test_thread_flag(TIF_SINGLESTEP)) - /* This means a user thread is single stepping - * a system call which should be ignored - */ - return NOTIFY_DONE; - /* fall through */ - default: - if (user_mode(regs)) - return NOTIFY_DONE; - } - - if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs)) - return NOTIFY_DONE; - - /* Must touch watchdog before return to normal operation */ - touch_nmi_watchdog(); - return NOTIFY_STOP; -} - -int kgdb_ll_trap(int cmd, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - struct die_args args = { - .regs = regs, - .str = str, - .err = err, - .trapnr = trap, - .signr = sig, - - }; - - if (!kgdb_io_module_registered) - return NOTIFY_DONE; - - return __kgdb_notify(&args, cmd); -} - -static int -kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) -{ - unsigned long flags; - int ret; - - local_irq_save(flags); - ret = __kgdb_notify(ptr, cmd); - local_irq_restore(flags); - - return ret; -} - -static struct notifier_block kgdb_notifier = { - .notifier_call = kgdb_notify, -}; - -/** - * kgdb_arch_init - Perform any architecture specific initalization. - * - * This function will handle the initalization of any architecture - * specific callbacks. - */ -int kgdb_arch_init(void) -{ - int retval; - - retval = register_die_notifier(&kgdb_notifier); - if (retval) - goto out; - - retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler, - 0, "kgdb"); - if (retval) - goto out1; - - retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler, - 0, "kgdb"); - - if (retval) - goto out2; - - return retval; - -out2: - unregister_nmi_handler(NMI_LOCAL, "kgdb"); -out1: - unregister_die_notifier(&kgdb_notifier); -out: - return retval; -} - -static void kgdb_hw_overflow_handler(struct perf_event *event, - struct perf_sample_data *data, struct pt_regs *regs) -{ - struct task_struct *tsk = current; - int i; - - for (i = 0; i < 4; i++) - if (breakinfo[i].enabled) - tsk->thread.debugreg6 |= (DR_TRAP0 << i); -} - -void kgdb_arch_late(void) -{ - int i, cpu; - struct perf_event_attr attr; - struct perf_event **pevent; - - /* - * Pre-allocate the hw breakpoint structions in the non-atomic - * portion of kgdb because this operation requires mutexs to - * complete. - */ - hw_breakpoint_init(&attr); - attr.bp_addr = (unsigned long)kgdb_arch_init; - attr.bp_len = HW_BREAKPOINT_LEN_1; - attr.bp_type = HW_BREAKPOINT_W; - attr.disabled = 1; - for (i = 0; i < HBP_NUM; i++) { - if (breakinfo[i].pev) - continue; - breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); - if (IS_ERR((void * __force)breakinfo[i].pev)) { - printk(KERN_ERR "kgdb: Could not allocate hw" - "breakpoints\nDisabling the kernel debugger\n"); - breakinfo[i].pev = NULL; - kgdb_arch_exit(); - return; - } - for_each_online_cpu(cpu) { - pevent = per_cpu_ptr(breakinfo[i].pev, cpu); - pevent[0]->hw.sample_period = 1; - pevent[0]->overflow_handler = kgdb_hw_overflow_handler; - if (pevent[0]->destroy != NULL) { - pevent[0]->destroy = NULL; - release_bp_slot(*pevent); - } - } - } -} - -/** - * kgdb_arch_exit - Perform any architecture specific uninitalization. - * - * This function will handle the uninitalization of any architecture - * specific callbacks, for dynamic registration and unregistration. - */ -void kgdb_arch_exit(void) -{ - int i; - for (i = 0; i < 4; i++) { - if (breakinfo[i].pev) { - unregister_wide_hw_breakpoint(breakinfo[i].pev); - breakinfo[i].pev = NULL; - } - } - unregister_nmi_handler(NMI_UNKNOWN, "kgdb"); - unregister_nmi_handler(NMI_LOCAL, "kgdb"); - unregister_die_notifier(&kgdb_notifier); -} - -/** - * - * kgdb_skipexception - Bail out of KGDB when we've been triggered. - * @exception: Exception vector number - * @regs: Current &struct pt_regs. - * - * On some architectures we need to skip a breakpoint exception when - * it occurs after a breakpoint has been removed. - * - * Skip an int3 exception when it occurs after a breakpoint has been - * removed. Backtrack eip by 1 since the int3 would have caused it to - * increment by 1. - */ -int kgdb_skipexception(int exception, struct pt_regs *regs) -{ - if (exception == 3 && kgdb_isremovedbreak(regs->ip - 1)) { - regs->ip -= 1; - return 1; - } - return 0; -} - -unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) -{ - if (exception == 3) - return instruction_pointer(regs) - 1; - return instruction_pointer(regs); -} - -void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) -{ - regs->ip = ip; -} - -int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) -{ - int err; - char opc[BREAK_INSTR_SIZE]; - - bpt->type = BP_BREAKPOINT; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, - BREAK_INSTR_SIZE); - if (err) - return err; - err = probe_kernel_write((char *)bpt->bpt_addr, - arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); -#ifdef CONFIG_DEBUG_RODATA - if (!err) - return err; - /* - * It is safe to call text_poke() because normal kernel execution - * is stopped on all cores, so long as the text_mutex is not locked. - */ - if (mutex_is_locked(&text_mutex)) - return -EBUSY; - text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, - BREAK_INSTR_SIZE); - err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); - if (err) - return err; - if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) - return -EINVAL; - bpt->type = BP_POKE_BREAKPOINT; -#endif /* CONFIG_DEBUG_RODATA */ - return err; -} - -int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) -{ -#ifdef CONFIG_DEBUG_RODATA - int err; - char opc[BREAK_INSTR_SIZE]; - - if (bpt->type != BP_POKE_BREAKPOINT) - goto knl_write; - /* - * It is safe to call text_poke() because normal kernel execution - * is stopped on all cores, so long as the text_mutex is not locked. - */ - if (mutex_is_locked(&text_mutex)) - goto knl_write; - text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE); - err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); - if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) - goto knl_write; - return err; -knl_write: -#endif /* CONFIG_DEBUG_RODATA */ - return probe_kernel_write((char *)bpt->bpt_addr, - (char *)bpt->saved_instr, BREAK_INSTR_SIZE); -} - -struct kgdb_arch arch_kgdb_ops = { - /* Breakpoint instruction: */ - .gdb_bpt_instr = { 0xcc }, - .flags = KGDB_HW_BREAKPOINT, - .set_hw_breakpoint = kgdb_set_hw_break, - .remove_hw_breakpoint = kgdb_remove_hw_break, - .disable_hw_break = kgdb_disable_hw_debug, - .remove_all_hw_break = kgdb_remove_all_hw_break, - .correct_hw_break = kgdb_correct_hw_break, -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/kprobes-common.h b/ANDROID_3.4.5/arch/x86/kernel/kprobes-common.h deleted file mode 100644 index 3230b68e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/kprobes-common.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef __X86_KERNEL_KPROBES_COMMON_H -#define __X86_KERNEL_KPROBES_COMMON_H - -/* Kprobes and Optprobes common header */ - -#ifdef CONFIG_X86_64 -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax. */ \ - " subq $24, %rsp\n" \ - " pushq %rdi\n" \ - " pushq %rsi\n" \ - " pushq %rdx\n" \ - " pushq %rcx\n" \ - " pushq %rax\n" \ - " pushq %r8\n" \ - " pushq %r9\n" \ - " pushq %r10\n" \ - " pushq %r11\n" \ - " pushq %rbx\n" \ - " pushq %rbp\n" \ - " pushq %r12\n" \ - " pushq %r13\n" \ - " pushq %r14\n" \ - " pushq %r15\n" -#define RESTORE_REGS_STRING \ - " popq %r15\n" \ - " popq %r14\n" \ - " popq %r13\n" \ - " popq %r12\n" \ - " popq %rbp\n" \ - " popq %rbx\n" \ - " popq %r11\n" \ - " popq %r10\n" \ - " popq %r9\n" \ - " popq %r8\n" \ - " popq %rax\n" \ - " popq %rcx\n" \ - " popq %rdx\n" \ - " popq %rsi\n" \ - " popq %rdi\n" \ - /* Skip orig_ax, ip, cs */ \ - " addq $24, %rsp\n" -#else -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax and gs. */ \ - " subl $16, %esp\n" \ - " pushl %fs\n" \ - " pushl %es\n" \ - " pushl %ds\n" \ - " pushl %eax\n" \ - " pushl %ebp\n" \ - " pushl %edi\n" \ - " pushl %esi\n" \ - " pushl %edx\n" \ - " pushl %ecx\n" \ - " pushl %ebx\n" -#define RESTORE_REGS_STRING \ - " popl %ebx\n" \ - " popl %ecx\n" \ - " popl %edx\n" \ - " popl %esi\n" \ - " popl %edi\n" \ - " popl %ebp\n" \ - " popl %eax\n" \ - /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ - " addl $24, %esp\n" -#endif - -/* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); -/* Recover instruction if given address is probed */ -extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, - unsigned long addr); -/* - * Copy an instruction and adjust the displacement if the instruction - * uses the %rip-relative addressing mode. - */ -extern int __copy_instruction(u8 *dest, u8 *src); - -/* Generate a relative-jump/call instruction */ -extern void synthesize_reljump(void *from, void *to); -extern void synthesize_relcall(void *from, void *to); - -#ifdef CONFIG_OPTPROBES -extern int arch_init_optprobes(void); -extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); -extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); -#else /* !CONFIG_OPTPROBES */ -static inline int arch_init_optprobes(void) -{ - return 0; -} -static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) -{ - return 0; -} -static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) -{ - return addr; -} -#endif -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/kprobes-opt.c b/ANDROID_3.4.5/arch/x86/kernel/kprobes-opt.c deleted file mode 100644 index c5e410ee..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/kprobes-opt.c +++ /dev/null @@ -1,512 +0,0 @@ -/* - * Kernel Probes Jump Optimization (Optprobes) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * Copyright (C) Hitachi Ltd., 2012 - */ -#include <linux/kprobes.h> -#include <linux/ptrace.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/hardirq.h> -#include <linux/preempt.h> -#include <linux/module.h> -#include <linux/kdebug.h> -#include <linux/kallsyms.h> -#include <linux/ftrace.h> - -#include <asm/cacheflush.h> -#include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/uaccess.h> -#include <asm/alternative.h> -#include <asm/insn.h> -#include <asm/debugreg.h> - -#include "kprobes-common.h" - -unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) -{ - struct optimized_kprobe *op; - struct kprobe *kp; - long offs; - int i; - - for (i = 0; i < RELATIVEJUMP_SIZE; i++) { - kp = get_kprobe((void *)addr - i); - /* This function only handles jump-optimized kprobe */ - if (kp && kprobe_optimized(kp)) { - op = container_of(kp, struct optimized_kprobe, kp); - /* If op->list is not empty, op is under optimizing */ - if (list_empty(&op->list)) - goto found; - } - } - - return addr; -found: - /* - * If the kprobe can be optimized, original bytes which can be - * overwritten by jump destination address. In this case, original - * bytes must be recovered from op->optinsn.copied_insn buffer. - */ - memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - if (addr == (unsigned long)kp->addr) { - buf[0] = kp->opcode; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - } else { - offs = addr - (unsigned long)kp->addr - 1; - memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); - } - - return (unsigned long)buf; -} - -/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) -{ -#ifdef CONFIG_X86_64 - *addr++ = 0x48; - *addr++ = 0xbf; -#else - *addr++ = 0xb8; -#endif - *(unsigned long *)addr = val; -} - -static void __used __kprobes kprobes_optinsn_template_holder(void) -{ - asm volatile ( - ".global optprobe_template_entry\n" - "optprobe_template_entry:\n" -#ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rsi\n" - ".global optprobe_template_val\n" - "optprobe_template_val:\n" - ASM_NOP5 - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call:\n" - ASM_NOP5 - /* Move flags to rsp */ - " movq 144(%rsp), %rdx\n" - " movq %rdx, 152(%rsp)\n" - RESTORE_REGS_STRING - /* Skip flags entry */ - " addq $8, %rsp\n" - " popfq\n" -#else /* CONFIG_X86_32 */ - " pushf\n" - SAVE_REGS_STRING - " movl %esp, %edx\n" - ".global optprobe_template_val\n" - "optprobe_template_val:\n" - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call:\n" - ASM_NOP5 - RESTORE_REGS_STRING - " addl $4, %esp\n" /* skip cs */ - " popf\n" -#endif - ".global optprobe_template_end\n" - "optprobe_template_end:\n"); -} - -#define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) -#define TMPL_CALL_IDX \ - ((long)&optprobe_template_call - (long)&optprobe_template_entry) -#define TMPL_END_IDX \ - ((long)&optprobe_template_end - (long)&optprobe_template_entry) - -#define INT3_SIZE sizeof(kprobe_opcode_t) - -/* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long flags; - - /* This is possible if op is under delayed unoptimizing */ - if (kprobe_disabled(&op->kp)) - return; - - local_irq_save(flags); - if (kprobe_running()) { - kprobes_inc_nmissed_count(&op->kp); - } else { - /* Save skipped registers */ -#ifdef CONFIG_X86_64 - regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); - regs->gs = 0; -#endif - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; - regs->orig_ax = ~0UL; - - __this_cpu_write(current_kprobe, &op->kp); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - opt_pre_handler(&op->kp, regs); - __this_cpu_write(current_kprobe, NULL); - } - local_irq_restore(flags); -} - -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) -{ - int len = 0, ret; - - while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) - return -EINVAL; - len += ret; - } - /* Check whether the address range is reserved */ - if (ftrace_text_reserved(src, src + len - 1) || - alternatives_text_reserved(src, src + len - 1) || - jump_label_text_reserved(src, src + len - 1)) - return -EBUSY; - - return len; -} - -/* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) -{ - return ((insn->opcode.bytes[0] == 0xff && - (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ - insn->opcode.bytes[0] == 0xea); /* Segment based jump */ -} - -/* Check whether insn jumps into specified address range */ -static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) -{ - unsigned long target = 0; - - switch (insn->opcode.bytes[0]) { - case 0xe0: /* loopne */ - case 0xe1: /* loope */ - case 0xe2: /* loop */ - case 0xe3: /* jcxz */ - case 0xe9: /* near relative jump */ - case 0xeb: /* short relative jump */ - break; - case 0x0f: - if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ - break; - return 0; - default: - if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ - break; - return 0; - } - target = (unsigned long)insn->next_byte + insn->immediate.value; - - return (start <= target && target <= start + len); -} - -/* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) -{ - unsigned long addr, size = 0, offset = 0; - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - /* Lookup symbol including addr */ - if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) - return 0; - - /* - * Do not optimize in the entry code due to the unstable - * stack handling. - */ - if ((paddr >= (unsigned long)__entry_text_start) && - (paddr < (unsigned long)__entry_text_end)) - return 0; - - /* Check there is enough space for a relative jump. */ - if (size - offset < RELATIVEJUMP_SIZE) - return 0; - - /* Decode instructions */ - addr = paddr - offset; - while (addr < paddr - offset + size) { /* Decode until function end */ - if (search_exception_tables(addr)) - /* - * Since some fixup code will jumps into this function, - * we can't optimize kprobe in this function. - */ - return 0; - kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); - insn_get_length(&insn); - /* Another subsystem puts a breakpoint */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) - return 0; - /* Recover address */ - insn.kaddr = (void *)addr; - insn.next_byte = (void *)(addr + insn.length); - /* Check any instructions don't jump into target */ - if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_SIZE, - RELATIVE_ADDR_SIZE)) - return 0; - addr += insn.length; - } - - return 1; -} - -/* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) -{ - int i; - struct kprobe *p; - - for (i = 1; i < op->optinsn.size; i++) { - p = get_kprobe(op->kp.addr + i); - if (p && !kprobe_disabled(p)) - return -EEXIST; - } - - return 0; -} - -/* Check the addr is within the optimized instructions. */ -int __kprobes -arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) -{ - return ((unsigned long)op->kp.addr <= addr && - (unsigned long)op->kp.addr + op->optinsn.size > addr); -} - -/* Free optimized instruction slot */ -static __kprobes -void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) -{ - if (op->optinsn.insn) { - free_optinsn_slot(op->optinsn.insn, dirty); - op->optinsn.insn = NULL; - op->optinsn.size = 0; - } -} - -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) -{ - __arch_remove_optimized_kprobe(op, 1); -} - -/* - * Copy replacing target instructions - * Target instructions MUST be relocatable (checked inside) - * This is called when new aggr(opt)probe is allocated or reused. - */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) -{ - u8 *buf; - int ret; - long rel; - - if (!can_optimize((unsigned long)op->kp.addr)) - return -EILSEQ; - - op->optinsn.insn = get_optinsn_slot(); - if (!op->optinsn.insn) - return -ENOMEM; - - /* - * Verify if the address gap is in 2GB range, because this uses - * a relative jump. - */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) - return -ERANGE; - - buf = (u8 *)op->optinsn.insn; - - /* Copy instructions into the out-of-line buffer */ - ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); - if (ret < 0) { - __arch_remove_optimized_kprobe(op, 0); - return ret; - } - op->optinsn.size = ret; - - /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); - - /* Set probe information */ - synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); - - /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); - - /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, - (u8 *)op->kp.addr + op->optinsn.size); - - flush_icache_range((unsigned long) buf, - (unsigned long) buf + TMPL_END_IDX + - op->optinsn.size + RELATIVEJUMP_SIZE); - return 0; -} - -#define MAX_OPTIMIZE_PROBES 256 -static struct text_poke_param *jump_poke_params; -static struct jump_poke_buffer { - u8 buf[RELATIVEJUMP_SIZE]; -} *jump_poke_bufs; - -static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - s32 rel = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); - - /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, - RELATIVE_ADDR_SIZE); - - insn_buf[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&insn_buf[1]) = rel; - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Replace breakpoints (int3) with relative jumps. - * Caller must call with locking kprobe_mutex and text_mutex. - */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - WARN_ON(kprobe_disabled(&op->kp)); - /* Setup param */ - setup_optimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_del_init(&op->list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - /* Set int3 to first byte for kprobes */ - insn_buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Recover original instructions and breakpoints from relative jumps. - * Caller must call with locking kprobe_mutex. - */ -extern void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - /* Setup param */ - setup_unoptimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_move(&op->list, done_list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -/* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) -{ - u8 buf[RELATIVEJUMP_SIZE]; - - /* Set int3 to first byte for kprobes */ - buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); -} - -int __kprobes -setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) -{ - struct optimized_kprobe *op; - - if (p->flags & KPROBE_FLAG_OPTIMIZED) { - /* This kprobe is really able to run optimized path. */ - op = container_of(p, struct optimized_kprobe, kp); - /* Detour through copied instructions */ - regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; - if (!reenter) - reset_current_kprobe(); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -int __kprobes arch_init_optprobes(void) -{ - /* Allocate code buffer and parameter array */ - jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_bufs) - return -ENOMEM; - - jump_poke_params = kmalloc(sizeof(struct text_poke_param) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_params) { - kfree(jump_poke_bufs); - jump_poke_bufs = NULL; - return -ENOMEM; - } - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/kprobes.c b/ANDROID_3.4.5/arch/x86/kernel/kprobes.c deleted file mode 100644 index e213fc84..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/kprobes.c +++ /dev/null @@ -1,1063 +0,0 @@ -/* - * Kernel Probes (KProbes) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * - * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel - * Probes initial implementation ( includes contributions from - * Rusty Russell). - * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes - * interface to access function arguments. - * 2004-Oct Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi - * <prasanna@in.ibm.com> adapted for x86_64 from i386. - * 2005-Mar Roland McGrath <roland@redhat.com> - * Fixed to handle %rip-relative addressing mode correctly. - * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston - * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi - * <prasanna@in.ibm.com> added function-return probes. - * 2005-May Rusty Lynch <rusty.lynch@intel.com> - * Added function return probes functionality - * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added - * kprobe-booster and kretprobe-booster for i386. - * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster - * and kretprobe-booster for x86-64 - * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven - * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> - * unified x86 kprobes code. - */ -#include <linux/kprobes.h> -#include <linux/ptrace.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/hardirq.h> -#include <linux/preempt.h> -#include <linux/module.h> -#include <linux/kdebug.h> -#include <linux/kallsyms.h> -#include <linux/ftrace.h> - -#include <asm/cacheflush.h> -#include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/uaccess.h> -#include <asm/alternative.h> -#include <asm/insn.h> -#include <asm/debugreg.h> - -#include "kprobes-common.h" - -void jprobe_return_end(void); - -DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; -DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); - -#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) - -#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ - (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ - (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ - (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ - (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ - << (row % 32)) - /* - * Undefined/reserved opcodes, conditional jump, Opcode Extension - * Groups, and some special opcodes can not boost. - * This is non-const and volatile to keep gcc from statically - * optimizing it out, as variable_test_bit makes gcc think only - * *(unsigned long*) is used. - */ -static volatile u32 twobyte_is_boostable[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ---------------------------------------------- */ - W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ - W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ - W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */ - W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ - W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ - W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */ - W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */ - W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -#undef W - -struct kretprobe_blackpoint kretprobe_blacklist[] = { - {"__switch_to", }, /* This function switches only current task, but - doesn't switch kernel stack.*/ - {NULL, NULL} /* Terminator */ -}; - -const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); - -static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) -{ - struct __arch_relative_insn { - u8 op; - s32 raddr; - } __attribute__((packed)) *insn; - - insn = (struct __arch_relative_insn *)from; - insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); - insn->op = op; -} - -/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void __kprobes synthesize_reljump(void *from, void *to) -{ - __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); -} - -/* Insert a call instruction at address 'from', which calls address 'to'.*/ -void __kprobes synthesize_relcall(void *from, void *to) -{ - __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); -} - -/* - * Skip the prefixes of the instruction. - */ -static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) -{ - insn_attr_t attr; - - attr = inat_get_opcode_attribute((insn_byte_t)*insn); - while (inat_is_legacy_prefix(attr)) { - insn++; - attr = inat_get_opcode_attribute((insn_byte_t)*insn); - } -#ifdef CONFIG_X86_64 - if (inat_is_rex_prefix(attr)) - insn++; -#endif - return insn; -} - -/* - * Returns non-zero if opcode is boostable. - * RIP relative instructions are adjusted at copying time in 64 bits mode - */ -int __kprobes can_boost(kprobe_opcode_t *opcodes) -{ - kprobe_opcode_t opcode; - kprobe_opcode_t *orig_opcodes = opcodes; - - if (search_exception_tables((unsigned long)opcodes)) - return 0; /* Page fault may occur on this address. */ - -retry: - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - opcode = *(opcodes++); - - /* 2nd-byte opcode */ - if (opcode == 0x0f) { - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - return test_bit(*opcodes, - (unsigned long *)twobyte_is_boostable); - } - - switch (opcode & 0xf0) { -#ifdef CONFIG_X86_64 - case 0x40: - goto retry; /* REX prefix is boostable */ -#endif - case 0x60: - if (0x63 < opcode && opcode < 0x67) - goto retry; /* prefixes */ - /* can't boost Address-size override and bound */ - return (opcode != 0x62 && opcode != 0x67); - case 0x70: - return 0; /* can't boost conditional jump */ - case 0xc0: - /* can't boost software-interruptions */ - return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; - case 0xd0: - /* can boost AA* and XLAT */ - return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); - case 0xe0: - /* can boost in/out and absolute jmps */ - return ((opcode & 0x04) || opcode == 0xea); - case 0xf0: - if ((opcode & 0x0c) == 0 && opcode != 0xf1) - goto retry; /* lock/rep(ne) prefix */ - /* clear and set flags are boostable */ - return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); - default: - /* segment override prefixes are boostable */ - if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) - goto retry; /* prefixes */ - /* CS override prefix and call are not boostable */ - return (opcode != 0x2e && opcode != 0x9a); - } -} - -static unsigned long -__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) -{ - struct kprobe *kp; - - kp = get_kprobe((void *)addr); - /* There is no probe, return original address */ - if (!kp) - return addr; - - /* - * Basically, kp->ainsn.insn has an original instruction. - * However, RIP-relative instruction can not do single-stepping - * at different place, __copy_instruction() tweaks the displacement of - * that instruction. In that case, we can't recover the instruction - * from the kp->ainsn.insn. - * - * On the other hand, kp->opcode has a copy of the first byte of - * the probed instruction, which is overwritten by int3. And - * the instruction at kp->addr is not modified by kprobes except - * for the first byte, we can recover the original instruction - * from it and kp->opcode. - */ - memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - buf[0] = kp->opcode; - return (unsigned long)buf; -} - -/* - * Recover the probed instruction at addr for further analysis. - * Caller must lock kprobes by kprobe_mutex, or disable preemption - * for preventing to release referencing kprobes. - */ -unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) -{ - unsigned long __addr; - - __addr = __recover_optprobed_insn(buf, addr); - if (__addr != addr) - return __addr; - - return __recover_probed_insn(buf, addr); -} - -/* Check if paddr is at an instruction boundary */ -static int __kprobes can_probe(unsigned long paddr) -{ - unsigned long addr, __addr, offset = 0; - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) - return 0; - - /* Decode instructions */ - addr = paddr - offset; - while (addr < paddr) { - /* - * Check if the instruction has been modified by another - * kprobe, in which case we replace the breakpoint by the - * original instruction in our buffer. - * Also, jump optimization will change the breakpoint to - * relative-jump. Since the relative-jump itself is - * normally used, we just go through if there is no kprobe. - */ - __addr = recover_probed_instruction(buf, addr); - kernel_insn_init(&insn, (void *)__addr); - insn_get_length(&insn); - - /* - * Another debugging subsystem might insert this breakpoint. - * In that case, we can't recover it. - */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) - return 0; - addr += insn.length; - } - - return (addr == paddr); -} - -/* - * Returns non-zero if opcode modifies the interrupt flag. - */ -static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) -{ - /* Skip prefixes */ - insn = skip_prefixes(insn); - - switch (*insn) { - case 0xfa: /* cli */ - case 0xfb: /* sti */ - case 0xcf: /* iret/iretd */ - case 0x9d: /* popf/popfd */ - return 1; - } - - return 0; -} - -/* - * Copy an instruction and adjust the displacement if the instruction - * uses the %rip-relative addressing mode. - * If it does, Return the address of the 32-bit displacement word. - * If not, return null. - * Only applicable to 64-bit x86. - */ -int __kprobes __copy_instruction(u8 *dest, u8 *src) -{ - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); - insn_get_length(&insn); - /* Another subsystem puts a breakpoint, failed to recover */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) - return 0; - memcpy(dest, insn.kaddr, insn.length); - -#ifdef CONFIG_X86_64 - if (insn_rip_relative(&insn)) { - s64 newdisp; - u8 *disp; - kernel_insn_init(&insn, dest); - insn_get_displacement(&insn); - /* - * The copied instruction uses the %rip-relative addressing - * mode. Adjust the displacement for the difference between - * the original location of this instruction and the location - * of the copy that will actually be run. The tricky bit here - * is making sure that the sign extension happens correctly in - * this calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the %rip - * value and yield the same 64-bit result that the sign- - * extension of the original signed 32-bit displacement would - * have given. - */ - newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; - BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ - disp = (u8 *) dest + insn_offset_displacement(&insn); - *(s32 *) disp = (s32) newdisp; - } -#endif - return insn.length; -} - -static void __kprobes arch_copy_kprobe(struct kprobe *p) -{ - /* Copy an instruction with recovering if other optprobe modifies it.*/ - __copy_instruction(p->ainsn.insn, p->addr); - - /* - * __copy_instruction can modify the displacement of the instruction, - * but it doesn't affect boostable check. - */ - if (can_boost(p->ainsn.insn)) - p->ainsn.boostable = 0; - else - p->ainsn.boostable = -1; - - /* Also, displacement change doesn't affect the first byte */ - p->opcode = p->ainsn.insn[0]; -} - -int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - if (alternatives_text_reserved(p->addr, p->addr)) - return -EINVAL; - - if (!can_probe((unsigned long)p->addr)) - return -EILSEQ; - /* insn: must be on special executable page on x86. */ - p->ainsn.insn = get_insn_slot(); - if (!p->ainsn.insn) - return -ENOMEM; - arch_copy_kprobe(p); - return 0; -} - -void __kprobes arch_arm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); -} - -void __kprobes arch_disarm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, &p->opcode, 1); -} - -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ - if (p->ainsn.insn) { - free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); - p->ainsn.insn = NULL; - } -} - -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - kcb->prev_kprobe.kp = kprobe_running(); - kcb->prev_kprobe.status = kcb->kprobe_status; - kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags; - kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; -} - -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); - kcb->kprobe_status = kcb->prev_kprobe.status; - kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; - kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; -} - -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - __this_cpu_write(current_kprobe, p); - kcb->kprobe_saved_flags = kcb->kprobe_old_flags - = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - if (is_IF_modifier(p->ainsn.insn)) - kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; -} - -static void __kprobes clear_btf(void) -{ - if (test_thread_flag(TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - } -} - -static void __kprobes restore_btf(void) -{ - if (test_thread_flag(TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - } -} - -void __kprobes -arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - unsigned long *sara = stack_addr(regs); - - ri->ret_addr = (kprobe_opcode_t *) *sara; - - /* Replace the return addr with trampoline addr */ - *sara = (unsigned long) &kretprobe_trampoline; -} - -static void __kprobes -setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) -{ - if (setup_detour_execution(p, regs, reenter)) - return; - -#if !defined(CONFIG_PREEMPT) - if (p->ainsn.boostable == 1 && !p->post_handler) { - /* Boost up -- we can execute copied instructions directly */ - if (!reenter) - reset_current_kprobe(); - /* - * Reentering boosted probe doesn't reset current_kprobe, - * nor set current_kprobe, because it doesn't use single - * stepping. - */ - regs->ip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); - return; - } -#endif - if (reenter) { - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kcb->kprobe_status = KPROBE_REENTER; - } else - kcb->kprobe_status = KPROBE_HIT_SS; - /* Prepare real single stepping */ - clear_btf(); - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; - /* single step inline if the instruction is an int3 */ - if (p->opcode == BREAKPOINT_INSTRUCTION) - regs->ip = (unsigned long)p->addr; - else - regs->ip = (unsigned long)p->ainsn.insn; -} - -/* - * We have reentered the kprobe_handler(), since another probe was hit while - * within the handler. We save the original kprobes variables and just single - * step on the instruction of the new probe without calling any user handlers. - */ -static int __kprobes -reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) -{ - switch (kcb->kprobe_status) { - case KPROBE_HIT_SSDONE: - case KPROBE_HIT_ACTIVE: - kprobes_inc_nmissed_count(p); - setup_singlestep(p, regs, kcb, 1); - break; - case KPROBE_HIT_SS: - /* A probe has been hit in the codepath leading up to, or just - * after, single-stepping of a probed instruction. This entire - * codepath should strictly reside in .kprobes.text section. - * Raise a BUG or we'll continue in an endless reentering loop - * and eventually a stack overflow. - */ - printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", - p->addr); - dump_kprobe(p); - BUG(); - default: - /* impossible cases */ - WARN_ON(1); - return 0; - } - - return 1; -} - -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled throughout this function. - */ -static int __kprobes kprobe_handler(struct pt_regs *regs) -{ - kprobe_opcode_t *addr; - struct kprobe *p; - struct kprobe_ctlblk *kcb; - - addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); - /* - * We don't want to be preempted for the entire - * duration of kprobe processing. We conditionally - * re-enable preemption at the end of this function, - * and also in reenter_kprobe() and setup_singlestep(). - */ - preempt_disable(); - - kcb = get_kprobe_ctlblk(); - p = get_kprobe(addr); - - if (p) { - if (kprobe_running()) { - if (reenter_kprobe(p, regs, kcb)) - return 1; - } else { - set_current_kprobe(p, regs, kcb); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - - /* - * If we have no pre-handler or it returned 0, we - * continue with normal processing. If we have a - * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry - * for jprobe processing, so get out doing nothing - * more here. - */ - if (!p->pre_handler || !p->pre_handler(p, regs)) - setup_singlestep(p, regs, kcb, 0); - return 1; - } - } else if (*addr != BREAKPOINT_INSTRUCTION) { - /* - * The breakpoint instruction was removed right - * after we hit it. Another cpu has removed - * either a probepoint or a debugger breakpoint - * at this address. In either case, no further - * handling of this interrupt is appropriate. - * Back up over the (now missing) int3 and run - * the original instruction. - */ - regs->ip = (unsigned long)addr; - preempt_enable_no_resched(); - return 1; - } else if (kprobe_running()) { - p = __this_cpu_read(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - setup_singlestep(p, regs, kcb, 0); - return 1; - } - } /* else: not a kprobe fault; let the kernel handle it */ - - preempt_enable_no_resched(); - return 0; -} - -/* - * When a retprobed function returns, this code saves registers and - * calls trampoline_handler() runs, which calls the kretprobe's handler. - */ -static void __used __kprobes kretprobe_trampoline_holder(void) -{ - asm volatile ( - ".global kretprobe_trampoline\n" - "kretprobe_trampoline: \n" -#ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rdi\n" - " call trampoline_handler\n" - /* Replace saved sp with true return address. */ - " movq %rax, 152(%rsp)\n" - RESTORE_REGS_STRING - " popfq\n" -#else - " pushf\n" - SAVE_REGS_STRING - " movl %esp, %eax\n" - " call trampoline_handler\n" - /* Move flags to cs */ - " movl 56(%esp), %edx\n" - " movl %edx, 52(%esp)\n" - /* Replace saved flags with true return address. */ - " movl %eax, 56(%esp)\n" - RESTORE_REGS_STRING - " popf\n" -#endif - " ret\n"); -} - -/* - * Called from kretprobe_trampoline - */ -static __used __kprobes void *trampoline_handler(struct pt_regs *regs) -{ - struct kretprobe_instance *ri = NULL; - struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; - kprobe_opcode_t *correct_ret_addr = NULL; - - INIT_HLIST_HEAD(&empty_rp); - kretprobe_hash_lock(current, &head, &flags); - /* fixup registers */ -#ifdef CONFIG_X86_64 - regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); - regs->gs = 0; -#endif - regs->ip = trampoline_address; - regs->orig_ax = ~0UL; - - /* - * It is possible to have multiple instances associated with a given - * task either because multiple functions in the call path have - * return probes installed on them, and/or more than one - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always pushed into the head of the list - * - when multiple return probes are registered for the same - * function, the (chronologically) first instance's ret_addr - * will be the real return address, and all the rest will - * point to kretprobe_trampoline. - */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long)ri->ret_addr; - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - kretprobe_assert(ri, orig_ret_address, trampoline_address); - - correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long)ri->ret_addr; - if (ri->rp && ri->rp->handler) { - __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; - ri->ret_addr = correct_ret_addr; - ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); - } - - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - kretprobe_hash_unlock(current, &flags); - - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } - return (void *)orig_ret_address; -} - -/* - * Called after single-stepping. p->addr is the address of the - * instruction whose first byte has been replaced by the "int 3" - * instruction. To avoid the SMP problems that can occur when we - * temporarily put back the original opcode to single-step, we - * single-stepped a copy of the instruction. The address of this - * copy is p->ainsn.insn. - * - * This function prepares to return from the post-single-step - * interrupt. We have to fix up the stack as follows: - * - * 0) Except in the case of absolute or indirect jump or call instructions, - * the new ip is relative to the copied instruction. We need to make - * it relative to the original instruction. - * - * 1) If the single-stepped instruction was pushfl, then the TF and IF - * flags are set in the just-pushed flags, and may need to be cleared. - * - * 2) If the single-stepped instruction was a call, the return address - * that is atop the stack is the address following the copied instruction. - * We need to make it the address following the original instruction. - * - * If this is the first time we've single-stepped the instruction at - * this probepoint, and the instruction is boostable, boost it: add a - * jump instruction after the copied instruction, that jumps to the next - * instruction after the probepoint. - */ -static void __kprobes -resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) -{ - unsigned long *tos = stack_addr(regs); - unsigned long copy_ip = (unsigned long)p->ainsn.insn; - unsigned long orig_ip = (unsigned long)p->addr; - kprobe_opcode_t *insn = p->ainsn.insn; - - /* Skip prefixes */ - insn = skip_prefixes(insn); - - regs->flags &= ~X86_EFLAGS_TF; - switch (*insn) { - case 0x9c: /* pushfl */ - *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); - *tos |= kcb->kprobe_old_flags; - break; - case 0xc2: /* iret/ret/lret */ - case 0xc3: - case 0xca: - case 0xcb: - case 0xcf: - case 0xea: /* jmp absolute -- ip is correct */ - /* ip is already adjusted, no more changes required */ - p->ainsn.boostable = 1; - goto no_change; - case 0xe8: /* call relative - Fix return addr */ - *tos = orig_ip + (*tos - copy_ip); - break; -#ifdef CONFIG_X86_32 - case 0x9a: /* call absolute -- same as call absolute, indirect */ - *tos = orig_ip + (*tos - copy_ip); - goto no_change; -#endif - case 0xff: - if ((insn[1] & 0x30) == 0x10) { - /* - * call absolute, indirect - * Fix return addr; ip is correct. - * But this is not boostable - */ - *tos = orig_ip + (*tos - copy_ip); - goto no_change; - } else if (((insn[1] & 0x31) == 0x20) || - ((insn[1] & 0x31) == 0x21)) { - /* - * jmp near and far, absolute indirect - * ip is correct. And this is boostable - */ - p->ainsn.boostable = 1; - goto no_change; - } - default: - break; - } - - if (p->ainsn.boostable == 0) { - if ((regs->ip > copy_ip) && - (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) { - /* - * These instructions can be executed directly if it - * jumps back to correct address. - */ - synthesize_reljump((void *)regs->ip, - (void *)orig_ip + (regs->ip - copy_ip)); - p->ainsn.boostable = 1; - } else { - p->ainsn.boostable = -1; - } - } - - regs->ip += orig_ip - copy_ip; - -no_change: - restore_btf(); -} - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled throughout this function. - */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (!cur) - return 0; - - resume_execution(cur, regs, kcb); - regs->flags |= kcb->kprobe_saved_flags; - - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - - /* Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); -out: - preempt_enable_no_resched(); - - /* - * if somebody else is singlestepping across a probe point, flags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (regs->flags & X86_EFLAGS_TF) - return 0; - - return 1; -} - -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - switch (kcb->kprobe_status) { - case KPROBE_HIT_SS: - case KPROBE_REENTER: - /* - * We are here because the instruction being single - * stepped caused a page fault. We reset the current - * kprobe and the ip points back to the probe address - * and allow the page fault handler to continue as a - * normal page fault. - */ - regs->ip = (unsigned long)cur->addr; - regs->flags |= kcb->kprobe_old_flags; - if (kcb->kprobe_status == KPROBE_REENTER) - restore_previous_kprobe(kcb); - else - reset_current_kprobe(); - preempt_enable_no_resched(); - break; - case KPROBE_HIT_ACTIVE: - case KPROBE_HIT_SSDONE: - /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* - * In case the user-specified fault handler returned - * zero, try to fix up. - */ - if (fixup_exception(regs)) - return 1; - - /* - * fixup routine could not handle it, - * Let do_page_fault() fix it. - */ - break; - default: - break; - } - return 0; -} - -/* - * Wrapper routine for handling exceptions. - */ -int __kprobes -kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) -{ - struct die_args *args = data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode_vm(args->regs)) - return ret; - - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; - ret = NOTIFY_STOP; - } - break; - case DIE_GPF: - /* - * To be potentially processing a kprobe fault and to - * trust the result from kprobe_running(), we have - * be non-preemptible. - */ - if (!preemptible() && kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - kcb->jprobe_saved_sp = stack_addr(regs); - addr = (unsigned long)(kcb->jprobe_saved_sp); - - /* - * As Linus pointed out, gcc assumes that the callee - * owns the argument space and could overwrite it, e.g. - * tailcall optimization. So, to be absolutely safe - * we also save and restore enough stack bytes to cover - * the argument area. - */ - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, - MIN_STACK_SIZE(addr)); - regs->flags &= ~X86_EFLAGS_IF; - trace_hardirqs_off(); - regs->ip = (unsigned long)(jp->entry); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - asm volatile ( -#ifdef CONFIG_X86_64 - " xchg %%rbx,%%rsp \n" -#else - " xchgl %%ebx,%%esp \n" -#endif - " int3 \n" - " .globl jprobe_return_end\n" - " jprobe_return_end: \n" - " nop \n"::"b" - (kcb->jprobe_saved_sp):"memory"); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - u8 *addr = (u8 *) (regs->ip - 1); - struct jprobe *jp = container_of(p, struct jprobe, kp); - - if ((addr > (u8 *) jprobe_return) && - (addr < (u8 *) jprobe_return_end)) { - if (stack_addr(regs) != kcb->jprobe_saved_sp) { - struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; - printk(KERN_ERR - "current sp %p does not match saved sp %p\n", - stack_addr(regs), kcb->jprobe_saved_sp); - printk(KERN_ERR "Saved registers for jprobe %p\n", jp); - show_registers(saved_regs); - printk(KERN_ERR "Current registers\n"); - show_registers(regs); - BUG(); - } - *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), - kcb->jprobes_stack, - MIN_STACK_SIZE(kcb->jprobe_saved_sp)); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -int __init arch_init_kprobes(void) -{ - return arch_init_optprobes(); -} - -int __kprobes arch_trampoline_kprobe(struct kprobe *p) -{ - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/kvm.c b/ANDROID_3.4.5/arch/x86/kernel/kvm.c deleted file mode 100644 index e554e5ad..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/kvm.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * KVM paravirt_ops implementation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> - * Copyright IBM Corporation, 2007 - * Authors: Anthony Liguori <aliguori@us.ibm.com> - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/kvm_para.h> -#include <linux/cpu.h> -#include <linux/mm.h> -#include <linux/highmem.h> -#include <linux/hardirq.h> -#include <linux/notifier.h> -#include <linux/reboot.h> -#include <linux/hash.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/kprobes.h> -#include <asm/timer.h> -#include <asm/cpu.h> -#include <asm/traps.h> -#include <asm/desc.h> -#include <asm/tlbflush.h> -#include <asm/idle.h> - -static int kvmapf = 1; - -static int parse_no_kvmapf(char *arg) -{ - kvmapf = 0; - return 0; -} - -early_param("no-kvmapf", parse_no_kvmapf); - -static int steal_acc = 1; -static int parse_no_stealacc(char *arg) -{ - steal_acc = 0; - return 0; -} - -early_param("no-steal-acc", parse_no_stealacc); - -static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); -static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); -static int has_steal_clock = 0; - -/* - * No need for any "IO delay" on KVM - */ -static void kvm_io_delay(void) -{ -} - -#define KVM_TASK_SLEEP_HASHBITS 8 -#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS) - -struct kvm_task_sleep_node { - struct hlist_node link; - wait_queue_head_t wq; - u32 token; - int cpu; - bool halted; -}; - -static struct kvm_task_sleep_head { - spinlock_t lock; - struct hlist_head list; -} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; - -static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, - u32 token) -{ - struct hlist_node *p; - - hlist_for_each(p, &b->list) { - struct kvm_task_sleep_node *n = - hlist_entry(p, typeof(*n), link); - if (n->token == token) - return n; - } - - return NULL; -} - -void kvm_async_pf_task_wait(u32 token) -{ - u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); - struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; - struct kvm_task_sleep_node n, *e; - DEFINE_WAIT(wait); - int cpu, idle; - - cpu = get_cpu(); - idle = idle_cpu(cpu); - put_cpu(); - - spin_lock(&b->lock); - e = _find_apf_task(b, token); - if (e) { - /* dummy entry exist -> wake up was delivered ahead of PF */ - hlist_del(&e->link); - kfree(e); - spin_unlock(&b->lock); - return; - } - - n.token = token; - n.cpu = smp_processor_id(); - n.halted = idle || preempt_count() > 1; - init_waitqueue_head(&n.wq); - hlist_add_head(&n.link, &b->list); - spin_unlock(&b->lock); - - for (;;) { - if (!n.halted) - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); - if (hlist_unhashed(&n.link)) - break; - - if (!n.halted) { - local_irq_enable(); - schedule(); - local_irq_disable(); - } else { - /* - * We cannot reschedule. So halt. - */ - native_safe_halt(); - local_irq_disable(); - } - } - if (!n.halted) - finish_wait(&n.wq, &wait); - - return; -} -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); - -static void apf_task_wake_one(struct kvm_task_sleep_node *n) -{ - hlist_del_init(&n->link); - if (n->halted) - smp_send_reschedule(n->cpu); - else if (waitqueue_active(&n->wq)) - wake_up(&n->wq); -} - -static void apf_task_wake_all(void) -{ - int i; - - for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { - struct hlist_node *p, *next; - struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; - spin_lock(&b->lock); - hlist_for_each_safe(p, next, &b->list) { - struct kvm_task_sleep_node *n = - hlist_entry(p, typeof(*n), link); - if (n->cpu == smp_processor_id()) - apf_task_wake_one(n); - } - spin_unlock(&b->lock); - } -} - -void kvm_async_pf_task_wake(u32 token) -{ - u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); - struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; - struct kvm_task_sleep_node *n; - - if (token == ~0) { - apf_task_wake_all(); - return; - } - -again: - spin_lock(&b->lock); - n = _find_apf_task(b, token); - if (!n) { - /* - * async PF was not yet handled. - * Add dummy entry for the token. - */ - n = kzalloc(sizeof(*n), GFP_ATOMIC); - if (!n) { - /* - * Allocation failed! Busy wait while other cpu - * handles async PF. - */ - spin_unlock(&b->lock); - cpu_relax(); - goto again; - } - n->token = token; - n->cpu = smp_processor_id(); - init_waitqueue_head(&n->wq); - hlist_add_head(&n->link, &b->list); - } else - apf_task_wake_one(n); - spin_unlock(&b->lock); - return; -} -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); - -u32 kvm_read_and_reset_pf_reason(void) -{ - u32 reason = 0; - - if (__get_cpu_var(apf_reason).enabled) { - reason = __get_cpu_var(apf_reason).reason; - __get_cpu_var(apf_reason).reason = 0; - } - - return reason; -} -EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); - -dotraplinkage void __kprobes -do_async_page_fault(struct pt_regs *regs, unsigned long error_code) -{ - switch (kvm_read_and_reset_pf_reason()) { - default: - do_page_fault(regs, error_code); - break; - case KVM_PV_REASON_PAGE_NOT_PRESENT: - /* page is swapped out by the host. */ - kvm_async_pf_task_wait((u32)read_cr2()); - break; - case KVM_PV_REASON_PAGE_READY: - rcu_irq_enter(); - exit_idle(); - kvm_async_pf_task_wake((u32)read_cr2()); - rcu_irq_exit(); - break; - } -} - -static void __init paravirt_ops_setup(void) -{ - pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; - - if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) - pv_cpu_ops.io_delay = kvm_io_delay; - -#ifdef CONFIG_X86_IO_APIC - no_timer_check = 1; -#endif -} - -static void kvm_register_steal_time(void) -{ - int cpu = smp_processor_id(); - struct kvm_steal_time *st = &per_cpu(steal_time, cpu); - - if (!has_steal_clock) - return; - - memset(st, 0, sizeof(*st)); - - wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); - printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", - cpu, __pa(st)); -} - -void __cpuinit kvm_guest_cpu_init(void) -{ - if (!kvm_para_available()) - return; - - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = __pa(&__get_cpu_var(apf_reason)); - -#ifdef CONFIG_PREEMPT - pa |= KVM_ASYNC_PF_SEND_ALWAYS; -#endif - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); - __get_cpu_var(apf_reason).enabled = 1; - printk(KERN_INFO"KVM setup async PF for cpu %d\n", - smp_processor_id()); - } - - if (has_steal_clock) - kvm_register_steal_time(); -} - -static void kvm_pv_disable_apf(void *unused) -{ - if (!__get_cpu_var(apf_reason).enabled) - return; - - wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __get_cpu_var(apf_reason).enabled = 0; - - printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", - smp_processor_id()); -} - -static int kvm_pv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - if (code == SYS_RESTART) - on_each_cpu(kvm_pv_disable_apf, NULL, 1); - return NOTIFY_DONE; -} - -static struct notifier_block kvm_pv_reboot_nb = { - .notifier_call = kvm_pv_reboot_notify, -}; - -static u64 kvm_steal_clock(int cpu) -{ - u64 steal; - struct kvm_steal_time *src; - int version; - - src = &per_cpu(steal_time, cpu); - do { - version = src->version; - rmb(); - steal = src->steal; - rmb(); - } while ((version & 1) || (version != src->version)); - - return steal; -} - -void kvm_disable_steal_time(void) -{ - if (!has_steal_clock) - return; - - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); -} - -#ifdef CONFIG_SMP -static void __init kvm_smp_prepare_boot_cpu(void) -{ -#ifdef CONFIG_KVM_CLOCK - WARN_ON(kvm_register_clock("primary cpu clock")); -#endif - kvm_guest_cpu_init(); - native_smp_prepare_boot_cpu(); -} - -static void __cpuinit kvm_guest_cpu_online(void *dummy) -{ - kvm_guest_cpu_init(); -} - -static void kvm_guest_cpu_offline(void *dummy) -{ - kvm_disable_steal_time(); - kvm_pv_disable_apf(NULL); - apf_task_wake_all(); -} - -static int __cpuinit kvm_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int cpu = (unsigned long)hcpu; - switch (action) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - case CPU_ONLINE_FROZEN: - smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata kvm_cpu_notifier = { - .notifier_call = kvm_cpu_notify, -}; -#endif - -static void __init kvm_apf_trap_init(void) -{ - set_intr_gate(14, &async_page_fault); -} - -void __init kvm_guest_init(void) -{ - int i; - - if (!kvm_para_available()) - return; - - paravirt_ops_setup(); - register_reboot_notifier(&kvm_pv_reboot_nb); - for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) - spin_lock_init(&async_pf_sleepers[i].lock); - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) - x86_init.irqs.trap_init = kvm_apf_trap_init; - - if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { - has_steal_clock = 1; - pv_time_ops.steal_clock = kvm_steal_clock; - } - -#ifdef CONFIG_SMP - smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; - register_cpu_notifier(&kvm_cpu_notifier); -#else - kvm_guest_cpu_init(); -#endif -} - -static __init int activate_jump_labels(void) -{ - if (has_steal_clock) { - static_key_slow_inc(¶virt_steal_enabled); - if (steal_acc) - static_key_slow_inc(¶virt_steal_rq_enabled); - } - - return 0; -} -arch_initcall(activate_jump_labels); diff --git a/ANDROID_3.4.5/arch/x86/kernel/kvmclock.c b/ANDROID_3.4.5/arch/x86/kernel/kvmclock.c deleted file mode 100644 index f8492da6..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/kvmclock.c +++ /dev/null @@ -1,220 +0,0 @@ -/* KVM paravirtual clock driver. A clocksource implementation - Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include <linux/clocksource.h> -#include <linux/kvm_para.h> -#include <asm/pvclock.h> -#include <asm/msr.h> -#include <asm/apic.h> -#include <linux/percpu.h> - -#include <asm/x86_init.h> -#include <asm/reboot.h> - -static int kvmclock = 1; -static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; -static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; - -static int parse_no_kvmclock(char *arg) -{ - kvmclock = 0; - return 0; -} -early_param("no-kvmclock", parse_no_kvmclock); - -/* The hypervisor will put information about time periodically here */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); -static struct pvclock_wall_clock wall_clock; - -/* - * The wallclock is the time of day when we booted. Since then, some time may - * have elapsed since the hypervisor wrote the data. So we try to account for - * that with system time - */ -static unsigned long kvm_get_wallclock(void) -{ - struct pvclock_vcpu_time_info *vcpu_time; - struct timespec ts; - int low, high; - - low = (int)__pa_symbol(&wall_clock); - high = ((u64)__pa_symbol(&wall_clock) >> 32); - - native_write_msr(msr_kvm_wall_clock, low, high); - - vcpu_time = &get_cpu_var(hv_clock); - pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); - put_cpu_var(hv_clock); - - return ts.tv_sec; -} - -static int kvm_set_wallclock(unsigned long now) -{ - return -1; -} - -static cycle_t kvm_clock_read(void) -{ - struct pvclock_vcpu_time_info *src; - cycle_t ret; - - preempt_disable_notrace(); - src = &__get_cpu_var(hv_clock); - ret = pvclock_clocksource_read(src); - preempt_enable_notrace(); - return ret; -} - -static cycle_t kvm_clock_get_cycles(struct clocksource *cs) -{ - return kvm_clock_read(); -} - -/* - * If we don't do that, there is the possibility that the guest - * will calibrate under heavy load - thus, getting a lower lpj - - * and execute the delays themselves without load. This is wrong, - * because no delay loop can finish beforehand. - * Any heuristics is subject to fail, because ultimately, a large - * poll of guests can be running and trouble each other. So we preset - * lpj here - */ -static unsigned long kvm_get_tsc_khz(void) -{ - struct pvclock_vcpu_time_info *src; - src = &per_cpu(hv_clock, 0); - return pvclock_tsc_khz(src); -} - -static void kvm_get_preset_lpj(void) -{ - unsigned long khz; - u64 lpj; - - khz = kvm_get_tsc_khz(); - - lpj = ((u64)khz * 1000); - do_div(lpj, HZ); - preset_lpj = lpj; -} - -static struct clocksource kvm_clock = { - .name = "kvm-clock", - .read = kvm_clock_get_cycles, - .rating = 400, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -int kvm_register_clock(char *txt) -{ - int cpu = smp_processor_id(); - int low, high, ret; - - low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; - high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); - ret = native_write_msr_safe(msr_kvm_system_time, low, high); - printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", - cpu, high, low, txt); - - return ret; -} - -static void kvm_save_sched_clock_state(void) -{ -} - -static void kvm_restore_sched_clock_state(void) -{ - kvm_register_clock("primary cpu clock, resume"); -} - -#ifdef CONFIG_X86_LOCAL_APIC -static void __cpuinit kvm_setup_secondary_clock(void) -{ - /* - * Now that the first cpu already had this clocksource initialized, - * we shouldn't fail. - */ - WARN_ON(kvm_register_clock("secondary cpu clock")); -} -#endif - -/* - * After the clock is registered, the host will keep writing to the - * registered memory location. If the guest happens to shutdown, this memory - * won't be valid. In cases like kexec, in which you install a new kernel, this - * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writting anything - * that does not have the 'enable' bit set in the msr - */ -#ifdef CONFIG_KEXEC -static void kvm_crash_shutdown(struct pt_regs *regs) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_crash_shutdown(regs); -} -#endif - -static void kvm_shutdown(void) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_shutdown(); -} - -void __init kvmclock_init(void) -{ - if (!kvm_para_available()) - return; - - if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { - msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; - msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; - } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) - return; - - printk(KERN_INFO "kvm-clock: Using msrs %x and %x", - msr_kvm_system_time, msr_kvm_wall_clock); - - if (kvm_register_clock("boot clock")) - return; - pv_time_ops.sched_clock = kvm_clock_read; - x86_platform.calibrate_tsc = kvm_get_tsc_khz; - x86_platform.get_wallclock = kvm_get_wallclock; - x86_platform.set_wallclock = kvm_set_wallclock; -#ifdef CONFIG_X86_LOCAL_APIC - x86_cpuinit.early_percpu_clock_init = - kvm_setup_secondary_clock; -#endif - x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; - x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; - machine_ops.shutdown = kvm_shutdown; -#ifdef CONFIG_KEXEC - machine_ops.crash_shutdown = kvm_crash_shutdown; -#endif - kvm_get_preset_lpj(); - clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; - pv_info.name = "KVM"; - - if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) - pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/ldt.c b/ANDROID_3.4.5/arch/x86/kernel/ldt.c deleted file mode 100644 index ebc98739..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/ldt.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> - * Copyright (C) 2002 Andi Kleen - * - * This handles calls from both 32bit and 64bit mode. - */ - -#include <linux/errno.h> -#include <linux/gfp.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/vmalloc.h> -#include <linux/uaccess.h> - -#include <asm/ldt.h> -#include <asm/desc.h> -#include <asm/mmu_context.h> -#include <asm/syscalls.h> - -#ifdef CONFIG_SMP -static void flush_ldt(void *current_mm) -{ - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); - else - newldt = (void *)__get_free_page(GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); - - paravirt_alloc_ldt(newldt, mincount); - -#ifdef CONFIG_X86_64 - /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - int i; - - if (err < 0) - return err; - - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct *old_mm; - int retval = 0; - - mutex_init(&mm->context.lock); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - * - * 64bit: Don't touch the LDT register - we're already in the next thread. - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } -} - -static int read_ldt(void __user *ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct *mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user *ptr, unsigned long bytecount) -{ - /* CHECKME: Can we use _one_ random number ? */ -#ifdef CONFIG_X86_32 - unsigned long size = 5 * sizeof(struct desc_struct); -#else - unsigned long size = 128; -#endif - if (bytecount > size) - bytecount = size; - if (clear_user(ptr, bytecount)) - return -EFAULT; - return bytecount; -} - -static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct *mm = current->mm; - struct desc_struct ldt; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; - } - } - - fill_ldt(&ldt, &ldt_info); - if (oldmode) - ldt.avl = 0; - - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); - error = 0; - -out_unlock: - mutex_unlock(&mm->context.lock); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, - unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/machine_kexec_32.c b/ANDROID_3.4.5/arch/x86/kernel/machine_kexec_32.c deleted file mode 100644 index 5b19e4d7..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/machine_kexec_32.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * handle transition of Linux booting another kernel - * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include <linux/mm.h> -#include <linux/kexec.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/numa.h> -#include <linux/ftrace.h> -#include <linux/suspend.h> -#include <linux/gfp.h> -#include <linux/io.h> - -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/apic.h> -#include <asm/cpufeature.h> -#include <asm/desc.h> -#include <asm/cacheflush.h> -#include <asm/debugreg.h> - -static void set_idt(void *newidt, __u16 limit) -{ - struct desc_ptr curidt; - - /* ia32 supports unaliged loads & stores */ - curidt.size = limit; - curidt.address = (unsigned long)newidt; - - load_idt(&curidt); -} - - -static void set_gdt(void *newgdt, __u16 limit) -{ - struct desc_ptr curgdt; - - /* ia32 supports unaligned loads & stores */ - curgdt.size = limit; - curgdt.address = (unsigned long)newgdt; - - load_gdt(&curgdt); -} - -static void load_segments(void) -{ -#define __STR(X) #X -#define STR(X) __STR(X) - - __asm__ __volatile__ ( - "\tljmp $"STR(__KERNEL_CS)",$1f\n" - "\t1:\n" - "\tmovl $"STR(__KERNEL_DS)",%%eax\n" - "\tmovl %%eax,%%ds\n" - "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" - "\tmovl %%eax,%%ss\n" - : : : "eax", "memory"); -#undef STR -#undef __STR -} - -static void machine_kexec_free_page_tables(struct kimage *image) -{ - free_page((unsigned long)image->arch.pgd); -#ifdef CONFIG_X86_PAE - free_page((unsigned long)image->arch.pmd0); - free_page((unsigned long)image->arch.pmd1); -#endif - free_page((unsigned long)image->arch.pte0); - free_page((unsigned long)image->arch.pte1); -} - -static int machine_kexec_alloc_page_tables(struct kimage *image) -{ - image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); -#ifdef CONFIG_X86_PAE - image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); - image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); -#endif - image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL); - image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL); - if (!image->arch.pgd || -#ifdef CONFIG_X86_PAE - !image->arch.pmd0 || !image->arch.pmd1 || -#endif - !image->arch.pte0 || !image->arch.pte1) { - machine_kexec_free_page_tables(image); - return -ENOMEM; - } - return 0; -} - -static void machine_kexec_page_table_set_one( - pgd_t *pgd, pmd_t *pmd, pte_t *pte, - unsigned long vaddr, unsigned long paddr) -{ - pud_t *pud; - - pgd += pgd_index(vaddr); -#ifdef CONFIG_X86_PAE - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) - set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT)); -#endif - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - if (!(pmd_val(*pmd) & _PAGE_PRESENT)) - set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); - pte = pte_offset_kernel(pmd, vaddr); - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); -} - -static void machine_kexec_prepare_page_tables(struct kimage *image) -{ - void *control_page; - pmd_t *pmd = NULL; - - control_page = page_address(image->control_code_page); -#ifdef CONFIG_X86_PAE - pmd = image->arch.pmd0; -#endif - machine_kexec_page_table_set_one( - image->arch.pgd, pmd, image->arch.pte0, - (unsigned long)control_page, __pa(control_page)); -#ifdef CONFIG_X86_PAE - pmd = image->arch.pmd1; -#endif - machine_kexec_page_table_set_one( - image->arch.pgd, pmd, image->arch.pte1, - __pa(control_page), __pa(control_page)); -} - -/* - * A architecture hook called to validate the - * proposed image and prepare the control pages - * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE - * have been allocated, but the segments have yet - * been copied into the kernel. - * - * Do what every setup is needed on image and the - * reboot code buffer to allow us to avoid allocations - * later. - * - * - Make control page executable. - * - Allocate page tables - * - Setup page tables - */ -int machine_kexec_prepare(struct kimage *image) -{ - int error; - - set_pages_x(image->control_code_page, 1); - error = machine_kexec_alloc_page_tables(image); - if (error) - return error; - machine_kexec_prepare_page_tables(image); - return 0; -} - -/* - * Undo anything leftover by machine_kexec_prepare - * when an image is freed. - */ -void machine_kexec_cleanup(struct kimage *image) -{ - set_pages_nx(image->control_code_page, 1); - machine_kexec_free_page_tables(image); -} - -/* - * Do not allocate memory (or fail in any way) in machine_kexec(). - * We are past the point of no return, committed to rebooting now. - */ -void machine_kexec(struct kimage *image) -{ - unsigned long page_list[PAGES_NR]; - void *control_page; - int save_ftrace_enabled; - asmlinkage unsigned long - (*relocate_kernel_ptr)(unsigned long indirection_page, - unsigned long control_page, - unsigned long start_address, - unsigned int has_pae, - unsigned int preserve_context); - -#ifdef CONFIG_KEXEC_JUMP - if (image->preserve_context) - save_processor_state(); -#endif - - save_ftrace_enabled = __ftrace_enabled_save(); - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - hw_breakpoint_disable(); - - if (image->preserve_context) { -#ifdef CONFIG_X86_IO_APIC - /* - * We need to put APICs in legacy mode so that we can - * get timer interrupts in second kernel. kexec/kdump - * paths already have calls to disable_IO_APIC() in - * one form or other. kexec jump path also need - * one. - */ - disable_IO_APIC(); -#endif - } - - control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); - - relocate_kernel_ptr = control_page; - page_list[PA_CONTROL_PAGE] = __pa(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; - page_list[PA_PGD] = __pa(image->arch.pgd); - - if (image->type == KEXEC_TYPE_DEFAULT) - page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) - << PAGE_SHIFT); - - /* - * The segment registers are funny things, they have both a - * visible and an invisible part. Whenever the visible part is - * set to a specific selector, the invisible part is loaded - * with from a table in memory. At no other time is the - * descriptor table in memory accessed. - * - * I take advantage of this here by force loading the - * segments, before I zap the gdt with an invalid value. - */ - load_segments(); - /* - * The gdt & idt are now invalid. - * If you want to load them you must set up your own idt & gdt. - */ - set_gdt(phys_to_virt(0), 0); - set_idt(phys_to_virt(0), 0); - - /* now call it */ - image->start = relocate_kernel_ptr((unsigned long)image->head, - (unsigned long)page_list, - image->start, cpu_has_pae, - image->preserve_context); - -#ifdef CONFIG_KEXEC_JUMP - if (image->preserve_context) - restore_processor_state(); -#endif - - __ftrace_enabled_restore(save_ftrace_enabled); -} - -void arch_crash_save_vmcoreinfo(void) -{ -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -#ifdef CONFIG_X86_PAE - VMCOREINFO_CONFIG(X86_PAE); -#endif -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/machine_kexec_64.c b/ANDROID_3.4.5/arch/x86/kernel/machine_kexec_64.c deleted file mode 100644 index b3ea9db3..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/machine_kexec_64.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - * handle transition of Linux booting another kernel - * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include <linux/mm.h> -#include <linux/kexec.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/reboot.h> -#include <linux/numa.h> -#include <linux/ftrace.h> -#include <linux/io.h> -#include <linux/suspend.h> - -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/debugreg.h> - -static int init_one_level2_page(struct kimage *image, pgd_t *pgd, - unsigned long addr) -{ - pud_t *pud; - pmd_t *pmd; - struct page *page; - int result = -ENOMEM; - - addr &= PMD_MASK; - pgd += pgd_index(addr); - if (!pgd_present(*pgd)) { - page = kimage_alloc_control_pages(image, 0); - if (!page) - goto out; - pud = (pud_t *)page_address(page); - clear_page(pud); - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); - } - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) { - page = kimage_alloc_control_pages(image, 0); - if (!page) - goto out; - pmd = (pmd_t *)page_address(page); - clear_page(pmd); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - } - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); - result = 0; -out: - return result; -} - -static void init_level2_page(pmd_t *level2p, unsigned long addr) -{ - unsigned long end_addr; - - addr &= PAGE_MASK; - end_addr = addr + PUD_SIZE; - while (addr < end_addr) { - set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); - addr += PMD_SIZE; - } -} - -static int init_level3_page(struct kimage *image, pud_t *level3p, - unsigned long addr, unsigned long last_addr) -{ - unsigned long end_addr; - int result; - - result = 0; - addr &= PAGE_MASK; - end_addr = addr + PGDIR_SIZE; - while ((addr < last_addr) && (addr < end_addr)) { - struct page *page; - pmd_t *level2p; - - page = kimage_alloc_control_pages(image, 0); - if (!page) { - result = -ENOMEM; - goto out; - } - level2p = (pmd_t *)page_address(page); - init_level2_page(level2p, addr); - set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); - addr += PUD_SIZE; - } - /* clear the unused entries */ - while (addr < end_addr) { - pud_clear(level3p++); - addr += PUD_SIZE; - } -out: - return result; -} - - -static int init_level4_page(struct kimage *image, pgd_t *level4p, - unsigned long addr, unsigned long last_addr) -{ - unsigned long end_addr; - int result; - - result = 0; - addr &= PAGE_MASK; - end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); - while ((addr < last_addr) && (addr < end_addr)) { - struct page *page; - pud_t *level3p; - - page = kimage_alloc_control_pages(image, 0); - if (!page) { - result = -ENOMEM; - goto out; - } - level3p = (pud_t *)page_address(page); - result = init_level3_page(image, level3p, addr, last_addr); - if (result) - goto out; - set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); - addr += PGDIR_SIZE; - } - /* clear the unused entries */ - while (addr < end_addr) { - pgd_clear(level4p++); - addr += PGDIR_SIZE; - } -out: - return result; -} - -static void free_transition_pgtable(struct kimage *image) -{ - free_page((unsigned long)image->arch.pud); - free_page((unsigned long)image->arch.pmd); - free_page((unsigned long)image->arch.pte); -} - -static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr, paddr; - int result = -ENOMEM; - - vaddr = (unsigned long)relocate_kernel; - paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); - pgd += pgd_index(vaddr); - if (!pgd_present(*pgd)) { - pud = (pud_t *)get_zeroed_page(GFP_KERNEL); - if (!pud) - goto err; - image->arch.pud = pud; - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); - } - pud = pud_offset(pgd, vaddr); - if (!pud_present(*pud)) { - pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); - if (!pmd) - goto err; - image->arch.pmd = pmd; - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - } - pmd = pmd_offset(pud, vaddr); - if (!pmd_present(*pmd)) { - pte = (pte_t *)get_zeroed_page(GFP_KERNEL); - if (!pte) - goto err; - image->arch.pte = pte; - set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); - } - pte = pte_offset_kernel(pmd, vaddr); - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); - return 0; -err: - free_transition_pgtable(image); - return result; -} - - -static int init_pgtable(struct kimage *image, unsigned long start_pgtable) -{ - pgd_t *level4p; - int result; - level4p = (pgd_t *)__va(start_pgtable); - result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); - if (result) - return result; - /* - * image->start may be outside 0 ~ max_pfn, for example when - * jump back to original kernel from kexeced kernel - */ - result = init_one_level2_page(image, level4p, image->start); - if (result) - return result; - return init_transition_pgtable(image, level4p); -} - -static void set_idt(void *newidt, u16 limit) -{ - struct desc_ptr curidt; - - /* x86-64 supports unaliged loads & stores */ - curidt.size = limit; - curidt.address = (unsigned long)newidt; - - __asm__ __volatile__ ( - "lidtq %0\n" - : : "m" (curidt) - ); -}; - - -static void set_gdt(void *newgdt, u16 limit) -{ - struct desc_ptr curgdt; - - /* x86-64 supports unaligned loads & stores */ - curgdt.size = limit; - curgdt.address = (unsigned long)newgdt; - - __asm__ __volatile__ ( - "lgdtq %0\n" - : : "m" (curgdt) - ); -}; - -static void load_segments(void) -{ - __asm__ __volatile__ ( - "\tmovl %0,%%ds\n" - "\tmovl %0,%%es\n" - "\tmovl %0,%%ss\n" - "\tmovl %0,%%fs\n" - "\tmovl %0,%%gs\n" - : : "a" (__KERNEL_DS) : "memory" - ); -} - -int machine_kexec_prepare(struct kimage *image) -{ - unsigned long start_pgtable; - int result; - - /* Calculate the offsets */ - start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; - - /* Setup the identity mapped 64bit page table */ - result = init_pgtable(image, start_pgtable); - if (result) - return result; - - return 0; -} - -void machine_kexec_cleanup(struct kimage *image) -{ - free_transition_pgtable(image); -} - -/* - * Do not allocate memory (or fail in any way) in machine_kexec(). - * We are past the point of no return, committed to rebooting now. - */ -void machine_kexec(struct kimage *image) -{ - unsigned long page_list[PAGES_NR]; - void *control_page; - int save_ftrace_enabled; - -#ifdef CONFIG_KEXEC_JUMP - if (image->preserve_context) - save_processor_state(); -#endif - - save_ftrace_enabled = __ftrace_enabled_save(); - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - hw_breakpoint_disable(); - - if (image->preserve_context) { -#ifdef CONFIG_X86_IO_APIC - /* - * We need to put APICs in legacy mode so that we can - * get timer interrupts in second kernel. kexec/kdump - * paths already have calls to disable_IO_APIC() in - * one form or other. kexec jump path also need - * one. - */ - disable_IO_APIC(); -#endif - } - - control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); - - page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; - page_list[PA_TABLE_PAGE] = - (unsigned long)__pa(page_address(image->control_code_page)); - - if (image->type == KEXEC_TYPE_DEFAULT) - page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) - << PAGE_SHIFT); - - /* - * The segment registers are funny things, they have both a - * visible and an invisible part. Whenever the visible part is - * set to a specific selector, the invisible part is loaded - * with from a table in memory. At no other time is the - * descriptor table in memory accessed. - * - * I take advantage of this here by force loading the - * segments, before I zap the gdt with an invalid value. - */ - load_segments(); - /* - * The gdt & idt are now invalid. - * If you want to load them you must set up your own idt & gdt. - */ - set_gdt(phys_to_virt(0), 0); - set_idt(phys_to_virt(0), 0); - - /* now call it */ - image->start = relocate_kernel((unsigned long)image->head, - (unsigned long)page_list, - image->start, - image->preserve_context); - -#ifdef CONFIG_KEXEC_JUMP - if (image->preserve_context) - restore_processor_state(); -#endif - - __ftrace_enabled_restore(save_ftrace_enabled); -} - -void arch_crash_save_vmcoreinfo(void) -{ - VMCOREINFO_SYMBOL(phys_base); - VMCOREINFO_SYMBOL(init_level4_pgt); - -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/mca_32.c b/ANDROID_3.4.5/arch/x86/kernel/mca_32.c deleted file mode 100644 index 7eb1e2b9..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/mca_32.c +++ /dev/null @@ -1,476 +0,0 @@ -/* - * Written by Martin Kolinek, February 1996 - * - * Changes: - * - * Chris Beauregard July 28th, 1996 - * - Fixed up integrated SCSI detection - * - * Chris Beauregard August 3rd, 1996 - * - Made mca_info local - * - Made integrated registers accessible through standard function calls - * - Added name field - * - More sanity checking - * - * Chris Beauregard August 9th, 1996 - * - Rewrote /proc/mca - * - * Chris Beauregard January 7th, 1997 - * - Added basic NMI-processing - * - Added more information to mca_info structure - * - * David Weinehall October 12th, 1998 - * - Made a lot of cleaning up in the source - * - Added use of save_flags / restore_flags - * - Added the 'driver_loaded' flag in MCA_adapter - * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter - * - * David Weinehall March 24th, 1999 - * - Fixed the output of 'Driver Installed' in /proc/mca/pos - * - Made the Integrated Video & SCSI show up even if they have id 0000 - * - * Alexander Viro November 9th, 1999 - * - Switched to regular procfs methods - * - * Alfred Arnold & David Weinehall August 23rd, 2000 - * - Added support for Planar POS-registers - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/mca.h> -#include <linux/kprobes.h> -#include <linux/slab.h> -#include <asm/io.h> -#include <linux/proc_fs.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/ioport.h> -#include <asm/uaccess.h> -#include <linux/init.h> - -static unsigned char which_scsi; - -int MCA_bus; -EXPORT_SYMBOL(MCA_bus); - -/* - * Motherboard register spinlock. Untested on SMP at the moment, but - * are there any MCA SMP boxes? - * - * Yes - Alan - */ -static DEFINE_SPINLOCK(mca_lock); - -/* Build the status info for the adapter */ - -static void mca_configure_adapter_status(struct mca_device *mca_dev) -{ - mca_dev->status = MCA_ADAPTER_NONE; - - mca_dev->pos_id = mca_dev->pos[0] - + (mca_dev->pos[1] << 8); - - if (!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) { - - /* - * id = 0x0000 usually indicates hardware failure, - * however, ZP Gu (zpg@castle.net> reports that his 9556 - * has 0x0000 as id and everything still works. There - * also seem to be an adapter with id = 0x0000; the - * NCR Parallel Bus Memory Card. Until this is confirmed, - * however, this code will stay. - */ - - mca_dev->status = MCA_ADAPTER_ERROR; - - return; - } else if (mca_dev->pos_id != 0xffff) { - - /* - * 0xffff usually indicates that there's no adapter, - * however, some integrated adapters may have 0xffff as - * their id and still be valid. Examples are on-board - * VGA of the 55sx, the integrated SCSI of the 56 & 57, - * and possibly also the 95 ULTIMEDIA. - */ - - mca_dev->status = MCA_ADAPTER_NORMAL; - } - - if ((mca_dev->pos_id == 0xffff || - mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) { - int j; - - for (j = 2; j < 8; j++) { - if (mca_dev->pos[j] != 0xff) { - mca_dev->status = MCA_ADAPTER_NORMAL; - break; - } - } - } - - if (!(mca_dev->pos[2] & MCA_ENABLED)) { - - /* enabled bit is in POS 2 */ - - mca_dev->status = MCA_ADAPTER_DISABLED; - } -} /* mca_configure_adapter_status */ - -/*--------------------------------------------------------------------*/ - -static struct resource mca_standard_resources[] = { - { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" }, - { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" }, - { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" }, - { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" }, - { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" }, - { .start = 0x96, .end = 0x97, .name = "POS (MCA)" }, - { .start = 0x100, .end = 0x107, .name = "POS (MCA)" } -}; - -#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources) - -/* - * mca_read_and_store_pos - read the POS registers into a memory buffer - * @pos: a char pointer to 8 bytes, contains the POS register value on - * successful return - * - * Returns 1 if a card actually exists (i.e. the pos isn't - * all 0xff) or 0 otherwise - */ -static int mca_read_and_store_pos(unsigned char *pos) -{ - int j; - int found = 0; - - for (j = 0; j < 8; j++) { - pos[j] = inb_p(MCA_POS_REG(j)); - if (pos[j] != 0xff) { - /* 0xff all across means no device. 0x00 means - * something's broken, but a device is - * probably there. However, if you get 0x00 - * from a motherboard register it won't matter - * what we find. For the record, on the - * 57SLC, the integrated SCSI adapter has - * 0xffff for the adapter ID, but nonzero for - * other registers. */ - - found = 1; - } - } - return found; -} - -static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg) -{ - unsigned char byte; - unsigned long flags; - - if (reg < 0 || reg >= 8) - return 0; - - spin_lock_irqsave(&mca_lock, flags); - if (mca_dev->pos_register) { - /* Disable adapter setup, enable motherboard setup */ - - outb_p(0, MCA_ADAPTER_SETUP_REG); - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - - byte = inb_p(MCA_POS_REG(reg)); - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - } else { - - /* Make sure motherboard setup is off */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Read the appropriate register */ - - outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG); - byte = inb_p(MCA_POS_REG(reg)); - outb_p(0, MCA_ADAPTER_SETUP_REG); - } - spin_unlock_irqrestore(&mca_lock, flags); - - mca_dev->pos[reg] = byte; - - return byte; -} - -static void mca_pc_write_pos(struct mca_device *mca_dev, int reg, - unsigned char byte) -{ - unsigned long flags; - - if (reg < 0 || reg >= 8) - return; - - spin_lock_irqsave(&mca_lock, flags); - - /* Make sure motherboard setup is off */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Read in the appropriate register */ - - outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG); - outb_p(byte, MCA_POS_REG(reg)); - outb_p(0, MCA_ADAPTER_SETUP_REG); - - spin_unlock_irqrestore(&mca_lock, flags); - - /* Update the global register list, while we have the byte */ - - mca_dev->pos[reg] = byte; - -} - -/* for the primary MCA bus, we have identity transforms */ -static int mca_dummy_transform_irq(struct mca_device *mca_dev, int irq) -{ - return irq; -} - -static int mca_dummy_transform_ioport(struct mca_device *mca_dev, int port) -{ - return port; -} - -static void *mca_dummy_transform_memory(struct mca_device *mca_dev, void *mem) -{ - return mem; -} - - -static int __init mca_init(void) -{ - unsigned int i, j; - struct mca_device *mca_dev; - unsigned char pos[8]; - short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00}; - struct mca_bus *bus; - - /* - * WARNING: Be careful when making changes here. Putting an adapter - * and the motherboard simultaneously into setup mode may result in - * damage to chips (according to The Indispensable PC Hardware Book - * by Hans-Peter Messmer). Also, we disable system interrupts (so - * that we are not disturbed in the middle of this). - */ - - /* Make sure the MCA bus is present */ - - if (mca_system_init()) { - printk(KERN_ERR "MCA bus system initialisation failed\n"); - return -ENODEV; - } - - if (!MCA_bus) - return -ENODEV; - - printk(KERN_INFO "Micro Channel bus detected.\n"); - - /* All MCA systems have at least a primary bus */ - bus = mca_attach_bus(MCA_PRIMARY_BUS); - if (!bus) - goto out_nomem; - bus->default_dma_mask = 0xffffffffLL; - bus->f.mca_write_pos = mca_pc_write_pos; - bus->f.mca_read_pos = mca_pc_read_pos; - bus->f.mca_transform_irq = mca_dummy_transform_irq; - bus->f.mca_transform_ioport = mca_dummy_transform_ioport; - bus->f.mca_transform_memory = mca_dummy_transform_memory; - - /* get the motherboard device */ - mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL); - if (unlikely(!mca_dev)) - goto out_nomem; - - /* - * We do not expect many MCA interrupts during initialization, - * but let us be safe: - */ - spin_lock_irq(&mca_lock); - - /* Make sure adapter setup is off */ - - outb_p(0, MCA_ADAPTER_SETUP_REG); - - /* Read motherboard POS registers */ - - mca_dev->pos_register = 0x7f; - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - mca_dev->name[0] = 0; - mca_read_and_store_pos(mca_dev->pos); - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for a motherboard */ - mca_dev->pos_id = MCA_MOTHERBOARD_POS; - mca_dev->slot = MCA_MOTHERBOARD; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - - mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if (unlikely(!mca_dev)) - goto out_unlock_nomem; - - /* Put motherboard into video setup mode, read integrated video - * POS registers, and turn motherboard setup off. - */ - - mca_dev->pos_register = 0xdf; - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - mca_dev->name[0] = 0; - mca_read_and_store_pos(mca_dev->pos); - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for the integrated video */ - mca_dev->pos_id = MCA_INTEGVIDEO_POS; - mca_dev->slot = MCA_INTEGVIDEO; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - - /* - * Put motherboard into scsi setup mode, read integrated scsi - * POS registers, and turn motherboard setup off. - * - * It seems there are two possible SCSI registers. Martin says that - * for the 56,57, 0xf7 is the one, but fails on the 76. - * Alfredo (apena@vnet.ibm.com) says - * 0xfd works on his machine. We'll try both of them. I figure it's - * a good bet that only one could be valid at a time. This could - * screw up though if one is used for something else on the other - * machine. - */ - - for (i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) { - outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG); - if (mca_read_and_store_pos(pos)) - break; - } - if (which_scsi) { - /* found a scsi card */ - mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if (unlikely(!mca_dev)) - goto out_unlock_nomem; - - for (j = 0; j < 8; j++) - mca_dev->pos[j] = pos[j]; - - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for integrated SCSI controller */ - mca_dev->pos_id = MCA_INTEGSCSI_POS; - mca_dev->slot = MCA_INTEGSCSI; - mca_dev->pos_register = which_scsi; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - } - - /* Turn off motherboard setup */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* - * Now loop over MCA slots: put each adapter into setup mode, and - * read its POS registers. Then put adapter setup off. - */ - - for (i = 0; i < MCA_MAX_SLOT_NR; i++) { - outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG); - if (!mca_read_and_store_pos(pos)) - continue; - - mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if (unlikely(!mca_dev)) - goto out_unlock_nomem; - - for (j = 0; j < 8; j++) - mca_dev->pos[j] = pos[j]; - - mca_dev->driver_loaded = 0; - mca_dev->slot = i; - mca_dev->pos_register = 0; - mca_configure_adapter_status(mca_dev); - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - } - outb_p(0, MCA_ADAPTER_SETUP_REG); - - /* Enable interrupts and return memory start */ - spin_unlock_irq(&mca_lock); - - for (i = 0; i < MCA_STANDARD_RESOURCES; i++) - request_resource(&ioport_resource, mca_standard_resources + i); - - mca_do_proc_init(); - - return 0; - - out_unlock_nomem: - spin_unlock_irq(&mca_lock); - out_nomem: - printk(KERN_EMERG "Failed memory allocation in MCA setup!\n"); - return -ENOMEM; -} - -subsys_initcall(mca_init); - -/*--------------------------------------------------------------------*/ - -static __kprobes void -mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) -{ - int slot = mca_dev->slot; - - if (slot == MCA_INTEGSCSI) { - printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n", - mca_dev->name); - } else if (slot == MCA_INTEGVIDEO) { - printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n", - mca_dev->name); - } else if (slot == MCA_MOTHERBOARD) { - printk(KERN_CRIT "NMI: caused by motherboard (%s)\n", - mca_dev->name); - } - - /* More info available in POS 6 and 7? */ - - if (check_flag) { - unsigned char pos6, pos7; - - pos6 = mca_device_read_pos(mca_dev, 6); - pos7 = mca_device_read_pos(mca_dev, 7); - - printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7); - } - -} /* mca_handle_nmi_slot */ - -/*--------------------------------------------------------------------*/ - -static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) -{ - struct mca_device *mca_dev = to_mca_device(dev); - unsigned char pos5; - - pos5 = mca_device_read_pos(mca_dev, 5); - - if (!(pos5 & 0x80)) { - /* - * Bit 7 of POS 5 is reset when this adapter has a hardware - * error. Bit 7 it reset if there's error information - * available in POS 6 and 7. - */ - mca_handle_nmi_device(mca_dev, !(pos5 & 0x40)); - return 1; - } - return 0; -} - -void __kprobes mca_handle_nmi(void) -{ - /* - * First try - scan the various adapters and see if a specific - * adapter was responsible for the error. - */ - bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/microcode_amd.c b/ANDROID_3.4.5/arch/x86/kernel/microcode_amd.c deleted file mode 100644 index 8a2ce8fd..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/microcode_amd.c +++ /dev/null @@ -1,395 +0,0 @@ -/* - * AMD CPU Microcode Update Driver for Linux - * Copyright (C) 2008-2011 Advanced Micro Devices Inc. - * - * Author: Peter Oruba <peter.oruba@amd.com> - * - * Based on work by: - * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> - * - * Maintainers: - * Andreas Herrmann <andreas.herrmann3@amd.com> - * Borislav Petkov <borislav.petkov@amd.com> - * - * This driver allows to upgrade microcode on F10h AMD - * CPUs and later. - * - * Licensed under the terms of the GNU General Public - * License version 2. See file COPYING for details. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/firmware.h> -#include <linux/pci_ids.h> -#include <linux/uaccess.h> -#include <linux/vmalloc.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/pci.h> - -#include <asm/microcode.h> -#include <asm/processor.h> -#include <asm/msr.h> - -MODULE_DESCRIPTION("AMD Microcode Update Driver"); -MODULE_AUTHOR("Peter Oruba"); -MODULE_LICENSE("GPL v2"); - -#define UCODE_MAGIC 0x00414d44 -#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -#define UCODE_UCODE_TYPE 0x00000001 - -struct equiv_cpu_entry { - u32 installed_cpu; - u32 fixed_errata_mask; - u32 fixed_errata_compare; - u16 equiv_cpu; - u16 res; -} __attribute__((packed)); - -struct microcode_header_amd { - u32 data_code; - u32 patch_id; - u16 mc_patch_data_id; - u8 mc_patch_data_len; - u8 init_flag; - u32 mc_patch_data_checksum; - u32 nb_dev_id; - u32 sb_dev_id; - u16 processor_rev_id; - u8 nb_rev_id; - u8 sb_rev_id; - u8 bios_api_rev; - u8 reserved1[3]; - u32 match_reg[8]; -} __attribute__((packed)); - -struct microcode_amd { - struct microcode_header_amd hdr; - unsigned int mpb[0]; -}; - -#define SECTION_HDR_SIZE 8 -#define CONTAINER_HDR_SZ 12 - -static struct equiv_cpu_entry *equiv_cpu_table; - -/* page-sized ucode patch buffer */ -void *patch; - -static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - - csig->rev = c->microcode; - pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); - - return 0; -} - -static unsigned int verify_ucode_size(int cpu, u32 patch_size, - unsigned int size) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - u32 max_size; - -#define F1XH_MPB_MAX_SIZE 2048 -#define F14H_MPB_MAX_SIZE 1824 -#define F15H_MPB_MAX_SIZE 4096 - - switch (c->x86) { - case 0x14: - max_size = F14H_MPB_MAX_SIZE; - break; - case 0x15: - max_size = F15H_MPB_MAX_SIZE; - break; - default: - max_size = F1XH_MPB_MAX_SIZE; - break; - } - - if (patch_size > min_t(u32, size, max_size)) { - pr_err("patch size mismatch\n"); - return 0; - } - - return patch_size; -} - -static u16 find_equiv_id(void) -{ - unsigned int current_cpu_id, i = 0; - - BUG_ON(equiv_cpu_table == NULL); - - current_cpu_id = cpuid_eax(0x00000001); - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (current_cpu_id == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } - return 0; -} - -/* - * we signal a good patch is found by returning its size > 0 - */ -static int get_matching_microcode(int cpu, const u8 *ucode_ptr, - unsigned int leftover_size, int rev, - unsigned int *current_size) -{ - struct microcode_header_amd *mc_hdr; - unsigned int actual_size; - u16 equiv_cpu_id; - - /* size of the current patch we're staring at */ - *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE; - - equiv_cpu_id = find_equiv_id(); - if (!equiv_cpu_id) - return 0; - - /* - * let's look at the patch header itself now - */ - mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); - - if (mc_hdr->processor_rev_id != equiv_cpu_id) - return 0; - - /* ucode might be chipset specific -- currently we don't support this */ - if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - pr_err("CPU%d: chipset specific code not yet supported\n", - cpu); - return 0; - } - - if (mc_hdr->patch_id <= rev) - return 0; - - /* - * now that the header looks sane, verify its size - */ - actual_size = verify_ucode_size(cpu, *current_size, leftover_size); - if (!actual_size) - return 0; - - /* clear the patch buffer */ - memset(patch, 0, PAGE_SIZE); - - /* all looks ok, get the binary patch */ - get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); - - return actual_size; -} - -static int apply_microcode_amd(int cpu) -{ - u32 rev, dummy; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - struct microcode_amd *mc_amd = uci->mc; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (mc_amd == NULL) - return 0; - - wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - /* get patch id after patching */ - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - /* check current patch id and patch's id for match */ - if (rev != mc_amd->hdr.patch_id) { - pr_err("CPU%d: update failed for patch_level=0x%08x\n", - cpu, mc_amd->hdr.patch_id); - return -1; - } - - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); - uci->cpu_sig.rev = rev; - c->microcode = rev; - - return 0; -} - -static int install_equiv_cpu_table(const u8 *buf) -{ - unsigned int *ibuf = (unsigned int *)buf; - unsigned int type = ibuf[1]; - unsigned int size = ibuf[2]; - - if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - pr_err("empty section/" - "invalid type field in container file section header\n"); - return -EINVAL; - } - - equiv_cpu_table = vmalloc(size); - if (!equiv_cpu_table) { - pr_err("failed to allocate equivalent CPU table\n"); - return -ENOMEM; - } - - get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); - - /* add header length */ - return size + CONTAINER_HDR_SZ; -} - -static void free_equiv_cpu_table(void) -{ - vfree(equiv_cpu_table); - equiv_cpu_table = NULL; -} - -static enum ucode_state -generic_load_microcode(int cpu, const u8 *data, size_t size) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header_amd *mc_hdr = NULL; - unsigned int mc_size, leftover, current_size = 0; - int offset; - const u8 *ucode_ptr = data; - void *new_mc = NULL; - unsigned int new_rev = uci->cpu_sig.rev; - enum ucode_state state = UCODE_ERROR; - - offset = install_equiv_cpu_table(ucode_ptr); - if (offset < 0) { - pr_err("failed to create equivalent cpu table\n"); - goto out; - } - ucode_ptr += offset; - leftover = size - offset; - - if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { - pr_err("invalid type field in container file section header\n"); - goto free_table; - } - - while (leftover) { - mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, - new_rev, ¤t_size); - if (mc_size) { - mc_hdr = patch; - new_mc = patch; - new_rev = mc_hdr->patch_id; - goto out_ok; - } - - ucode_ptr += current_size; - leftover -= current_size; - } - - if (!new_mc) { - state = UCODE_NFOUND; - goto free_table; - } - -out_ok: - uci->mc = new_mc; - state = UCODE_OK; - pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", - cpu, uci->cpu_sig.rev, new_rev); - -free_table: - free_equiv_cpu_table(); - -out: - return state; -} - -/* - * AMD microcode firmware naming convention, up to family 15h they are in - * the legacy file: - * - * amd-ucode/microcode_amd.bin - * - * This legacy file is always smaller than 2K in size. - * - * Starting at family 15h they are in family specific firmware files: - * - * amd-ucode/microcode_amd_fam15h.bin - * amd-ucode/microcode_amd_fam16h.bin - * ... - * - * These might be larger than 2K. - */ -static enum ucode_state request_microcode_amd(int cpu, struct device *device) -{ - char fw_name[36] = "amd-ucode/microcode_amd.bin"; - const struct firmware *fw; - enum ucode_state ret = UCODE_NFOUND; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - if (c->x86 >= 0x15) - snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); - - if (request_firmware(&fw, (const char *)fw_name, device)) { - pr_err("failed to load file %s\n", fw_name); - goto out; - } - - ret = UCODE_ERROR; - if (*(u32 *)fw->data != UCODE_MAGIC) { - pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data); - goto fw_release; - } - - ret = generic_load_microcode(cpu, fw->data, fw->size); - -fw_release: - release_firmware(fw); - -out: - return ret; -} - -static enum ucode_state -request_microcode_user(int cpu, const void __user *buf, size_t size) -{ - return UCODE_ERROR; -} - -static void microcode_fini_cpu_amd(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - uci->mc = NULL; -} - -static struct microcode_ops microcode_amd_ops = { - .request_microcode_user = request_microcode_user, - .request_microcode_fw = request_microcode_amd, - .collect_cpu_info = collect_cpu_info_amd, - .apply_microcode = apply_microcode_amd, - .microcode_fini_cpu = microcode_fini_cpu_amd, -}; - -struct microcode_ops * __init init_amd_microcode(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); - return NULL; - } - - patch = (void *)get_zeroed_page(GFP_KERNEL); - if (!patch) - return NULL; - - return µcode_amd_ops; -} - -void __exit exit_amd_microcode(void) -{ - free_page((unsigned long)patch); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/microcode_core.c b/ANDROID_3.4.5/arch/x86/kernel/microcode_core.c deleted file mode 100644 index c9bda6d6..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/microcode_core.c +++ /dev/null @@ -1,605 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> - * 2006 Shaohua Li <shaohua.li@intel.com> - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/Assets/PDF/manual/253668.pdf - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com> - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com> - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com> - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com> - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix. - * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and - * Tigran Aivazian <tigran@veritas.com> - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com> - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and - * Tigran Aivazian <tigran@veritas.com>, - * Serialize updates as required on HT processors due to - * speculative nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, - * Jun Nakajima <jun.nakajima@intel.com> - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/platform_device.h> -#include <linux/miscdevice.h> -#include <linux/capability.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/cpu.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/syscore_ops.h> - -#include <asm/microcode.h> -#include <asm/processor.h> -#include <asm/cpu_device_id.h> - -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "2.00" - -static struct microcode_ops *microcode_ops; - -/* - * Synchronization. - * - * All non cpu-hotplug-callback call sites use: - * - * - microcode_mutex to synchronize with each other; - * - get/put_online_cpus() to synchronize with - * the cpu-hotplug-callback call sites. - * - * We guarantee that only a single cpu is being - * updated at any particular moment of time. - */ -static DEFINE_MUTEX(microcode_mutex); - -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -EXPORT_SYMBOL_GPL(ucode_cpu_info); - -/* - * Operations that are run on a target cpu: - */ - -struct cpu_info_ctx { - struct cpu_signature *cpu_sig; - int err; -}; - -static void collect_cpu_info_local(void *arg) -{ - struct cpu_info_ctx *ctx = arg; - - ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(), - ctx->cpu_sig); -} - -static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig) -{ - struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 }; - int ret; - - ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1); - if (!ret) - ret = ctx.err; - - return ret; -} - -static int collect_cpu_info(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - int ret; - - memset(uci, 0, sizeof(*uci)); - - ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig); - if (!ret) - uci->valid = 1; - - return ret; -} - -struct apply_microcode_ctx { - int err; -}; - -static void apply_microcode_local(void *arg) -{ - struct apply_microcode_ctx *ctx = arg; - - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); -} - -static int apply_microcode_on_target(int cpu) -{ - struct apply_microcode_ctx ctx = { .err = 0 }; - int ret; - - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); - if (!ret) - ret = ctx.err; - - return ret; -} - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static int do_microcode_update(const void __user *buf, size_t size) -{ - int error = 0; - int cpu; - - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; - - if (!uci->valid) - continue; - - ustate = microcode_ops->request_microcode_user(cpu, buf, size); - if (ustate == UCODE_ERROR) { - error = -1; - break; - } else if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - } - - return error; -} - -static int microcode_open(struct inode *inode, struct file *file) -{ - return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; -} - -static ssize_t microcode_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - ssize_t ret = -EINVAL; - - if ((len >> PAGE_SHIFT) > totalram_pages) { - pr_err("too much data (max %ld pages)\n", totalram_pages); - return ret; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - if (do_microcode_update(buf, len) == 0) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, - .llseek = no_llseek, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .nodename = "cpu/microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init(void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void __exit microcode_dev_exit(void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -MODULE_ALIAS("devname:cpu/microcode"); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while (0) -#endif - -/* fake device for request_firmware */ -static struct platform_device *microcode_pdev; - -static int reload_for_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - int err = 0; - - mutex_lock(µcode_mutex); - if (uci->valid) { - enum ucode_state ustate; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - } - mutex_unlock(µcode_mutex); - - return err; -} - -static ssize_t reload_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - unsigned long val; - int cpu = dev->id; - int ret = 0; - char *end; - - val = simple_strtoul(buf, &end, 0); - if (end == buf) - return -EINVAL; - - if (val == 1) { - get_online_cpus(); - if (cpu_online(cpu)) - ret = reload_for_cpu(cpu); - put_online_cpus(); - } - - if (!ret) - ret = size; - - return ret; -} - -static ssize_t version_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); -} - -static ssize_t pf_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); -} - -static DEVICE_ATTR(reload, 0200, NULL, reload_store); -static DEVICE_ATTR(version, 0400, version_show, NULL); -static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &dev_attr_reload.attr, - &dev_attr_version.attr, - &dev_attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - microcode_ops->microcode_fini_cpu(cpu); - uci->valid = 0; -} - -static enum ucode_state microcode_resume_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->mc) - return UCODE_NFOUND; - - pr_debug("CPU%d updated upon resume\n", cpu); - apply_microcode_on_target(cpu); - - return UCODE_OK; -} - -static enum ucode_state microcode_init_cpu(int cpu) -{ - enum ucode_state ustate; - - if (collect_cpu_info(cpu)) - return UCODE_ERROR; - - /* --dimm. Trigger a delayed update? */ - if (system_state != SYSTEM_RUNNING) - return UCODE_NFOUND; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); - - if (ustate == UCODE_OK) { - pr_debug("CPU%d updated upon init\n", cpu); - apply_microcode_on_target(cpu); - } - - return ustate; -} - -static enum ucode_state microcode_update_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; - - if (uci->valid) - ustate = microcode_resume_cpu(cpu); - else - ustate = microcode_init_cpu(cpu); - - return ustate; -} - -static int mc_device_add(struct device *dev, struct subsys_interface *sif) -{ - int err, cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("CPU%d added\n", cpu); - - err = sysfs_create_group(&dev->kobj, &mc_attr_group); - if (err) - return err; - - if (microcode_init_cpu(cpu) == UCODE_ERROR) - return -EINVAL; - - return err; -} - -static int mc_device_remove(struct device *dev, struct subsys_interface *sif) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&dev->kobj, &mc_attr_group); - return 0; -} - -static struct subsys_interface mc_cpu_interface = { - .name = "microcode", - .subsys = &cpu_subsys, - .add_dev = mc_device_add, - .remove_dev = mc_device_remove, -}; - -/** - * mc_bp_resume - Update boot CPU microcode during resume. - */ -static void mc_bp_resume(void) -{ - int cpu = smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (uci->valid && uci->mc) - microcode_ops->apply_microcode(cpu); -} - -static struct syscore_ops mc_syscore_ops = { - .resume = mc_bp_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - - dev = get_cpu_device(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - microcode_update_cpu(cpu); - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - pr_debug("CPU%d added\n", cpu); - if (sysfs_create_group(&dev->kobj, &mc_attr_group)) - pr_err("Failed to create group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&dev->kobj, &mc_attr_group); - pr_debug("CPU%d removed\n", cpu); - break; - - /* - * When a CPU goes offline, don't free up or invalidate the copy of - * the microcode in kernel memory, so that we can reuse it when the - * CPU comes back online without unnecessarily requesting the userspace - * for it again. - */ - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -#ifdef MODULE -/* Autoload on Intel and AMD systems */ -static const struct x86_cpu_id microcode_id[] = { -#ifdef CONFIG_MICROCODE_INTEL - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, -#endif -#ifdef CONFIG_MICROCODE_AMD - { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, }, -#endif - {} -}; -MODULE_DEVICE_TABLE(x86cpu, microcode_id); -#endif - -static int __init microcode_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - int error; - - if (c->x86_vendor == X86_VENDOR_INTEL) - microcode_ops = init_intel_microcode(); - else if (c->x86_vendor == X86_VENDOR_AMD) - microcode_ops = init_amd_microcode(); - else - pr_err("no support for this CPU vendor\n"); - - if (!microcode_ops) - return -ENODEV; - - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) - return PTR_ERR(microcode_pdev); - - get_online_cpus(); - mutex_lock(µcode_mutex); - - error = subsys_interface_register(&mc_cpu_interface); - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - if (error) - goto out_pdev; - - error = microcode_dev_init(); - if (error) - goto out_driver; - - register_syscore_ops(&mc_syscore_ops); - register_hotcpu_notifier(&mc_cpu_notifier); - - pr_info("Microcode Update Driver: v" MICROCODE_VERSION - " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); - - return 0; - -out_driver: - get_online_cpus(); - mutex_lock(µcode_mutex); - - subsys_interface_unregister(&mc_cpu_interface); - - mutex_unlock(µcode_mutex); - put_online_cpus(); - -out_pdev: - platform_device_unregister(microcode_pdev); - return error; - -} -module_init(microcode_init); - -static void __exit microcode_exit(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - unregister_syscore_ops(&mc_syscore_ops); - - get_online_cpus(); - mutex_lock(µcode_mutex); - - subsys_interface_unregister(&mc_cpu_interface); - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); - - microcode_ops = NULL; - - if (c->x86_vendor == X86_VENDOR_AMD) - exit_amd_microcode(); - - pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -} -module_exit(microcode_exit); diff --git a/ANDROID_3.4.5/arch/x86/kernel/microcode_intel.c b/ANDROID_3.4.5/arch/x86/kernel/microcode_intel.c deleted file mode 100644 index 0327e2b3..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/microcode_intel.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> - * 2006 Shaohua Li <shaohua.li@intel.com> - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/Assets/PDF/manual/253668.pdf - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com> - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com> - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com> - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com> - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com> - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix. - * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and - * Tigran Aivazian <tigran@veritas.com> - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com> - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and - * Tigran Aivazian <tigran@veritas.com>, - * Serialize updates as required on HT processors due to - * speculative nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, - * Jun Nakajima <jun.nakajima@intel.com> - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/firmware.h> -#include <linux/uaccess.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/vmalloc.h> - -#include <asm/microcode.h> -#include <asm/processor.h> -#include <asm/msr.h> - -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); -MODULE_LICENSE("GPL"); - -struct microcode_header_intel { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int reserved[3]; -}; - -struct microcode_intel { - struct microcode_header_intel hdr; - unsigned int bits[0]; -}; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[0]; -}; - -#define DEFAULT_UCODE_DATASIZE (2000) -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) -#define DWSIZE (sizeof(u32)) - -#define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.totalsize ? \ - ((struct microcode_intel *)mc)->hdr.totalsize : \ - DEFAULT_UCODE_TOTALSIZE) - -#define get_datasize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - unsigned int val[2]; - - memset(csig, 0, sizeof(*csig)); - - csig->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - csig->pf = 1 << ((val[1] >> 18) & 7); - } - - csig->rev = c->microcode; - pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", - cpu_num, csig->sig, csig->pf, csig->rev); - - return 0; -} - -static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) -{ - return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; -} - -static inline int -update_match_revision(struct microcode_header_intel *mc_header, int rev) -{ - return (mc_header->rev <= rev) ? 0 : 1; -} - -static int microcode_sanity_check(void *mc) -{ - unsigned long total_size, data_size, ext_table_size; - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - int sum, orig_sum, ext_sigcount = 0, i; - struct extended_signature *ext_sig; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - - if (data_size + MC_HEADER_SIZE > total_size) { - pr_err("error! Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - pr_err("error! Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - pr_err("error! Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - pr_err("error! Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - pr_warning("aborting, bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - */ -static int -get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) -{ - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - - if (!update_match_revision(mc_header, rev)) - return 0; - - if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) - return 1; - - /* Look for ext. headers: */ - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; - - ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - - for (i = 0; i < ext_sigcount; i++) { - if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) - return 1; - ext_sig++; - } - return 0; -} - -static int apply_microcode(int cpu) -{ - struct microcode_intel *mc_intel; - struct ucode_cpu_info *uci; - unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - - uci = ucode_cpu_info + cpu; - mc_intel = uci->mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (mc_intel == NULL) - return 0; - - /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - if (val[1] != mc_intel->hdr.rev) { - pr_err("CPU%d update to revision 0x%x failed\n", - cpu_num, mc_intel->hdr.rev); - return -1; - } - pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", - cpu_num, val[1], - mc_intel->hdr.date & 0xffff, - mc_intel->hdr.date >> 24, - (mc_intel->hdr.date >> 16) & 0xff); - - uci->cpu_sig.rev = val[1]; - c->microcode = val[1]; - - return 0; -} - -static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, - int (*get_ucode_data)(void *, const void *, size_t)) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; - int new_rev = uci->cpu_sig.rev; - unsigned int leftover = size; - enum ucode_state state = UCODE_OK; - unsigned int curr_mc_size = 0; - - while (leftover) { - struct microcode_header_intel mc_header; - unsigned int mc_size; - - if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) - break; - - mc_size = get_totalsize(&mc_header); - if (!mc_size || mc_size > leftover) { - pr_err("error! Bad data in microcode data file\n"); - break; - } - - /* For performance reasons, reuse mc area when possible */ - if (!mc || mc_size > curr_mc_size) { - vfree(mc); - mc = vmalloc(mc_size); - if (!mc) - break; - curr_mc_size = mc_size; - } - - if (get_ucode_data(mc, ucode_ptr, mc_size) || - microcode_sanity_check(mc) < 0) { - break; - } - - if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { - vfree(new_mc); - new_rev = mc_header.rev; - new_mc = mc; - mc = NULL; /* trigger new vmalloc */ - } - - ucode_ptr += mc_size; - leftover -= mc_size; - } - - vfree(mc); - - if (leftover) { - vfree(new_mc); - state = UCODE_ERROR; - goto out; - } - - if (!new_mc) { - state = UCODE_NFOUND; - goto out; - } - - vfree(uci->mc); - uci->mc = (struct microcode_intel *)new_mc; - - pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); -out: - return state; -} - -static int get_ucode_fw(void *to, const void *from, size_t n) -{ - memcpy(to, from, n); - return 0; -} - -static enum ucode_state request_microcode_fw(int cpu, struct device *device) -{ - char name[30]; - struct cpuinfo_x86 *c = &cpu_data(cpu); - const struct firmware *firmware; - enum ucode_state ret; - - sprintf(name, "intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); - - if (request_firmware(&firmware, name, device)) { - pr_debug("data file %s load failed\n", name); - return UCODE_NFOUND; - } - - ret = generic_load_microcode(cpu, (void *)firmware->data, - firmware->size, &get_ucode_fw); - - release_firmware(firmware); - - return ret; -} - -static int get_ucode_user(void *to, const void *from, size_t n) -{ - return copy_from_user(to, from, n); -} - -static enum ucode_state -request_microcode_user(int cpu, const void __user *buf, size_t size) -{ - return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); -} - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - vfree(uci->mc); - uci->mc = NULL; -} - -static struct microcode_ops microcode_intel_ops = { - .request_microcode_user = request_microcode_user, - .request_microcode_fw = request_microcode_fw, - .collect_cpu_info = collect_cpu_info, - .apply_microcode = apply_microcode, - .microcode_fini_cpu = microcode_fini_cpu, -}; - -struct microcode_ops * __init init_intel_microcode(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - pr_err("Intel CPU family 0x%x not supported\n", c->x86); - return NULL; - } - - return µcode_intel_ops; -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/mmconf-fam10h_64.c b/ANDROID_3.4.5/arch/x86/kernel/mmconf-fam10h_64.c deleted file mode 100644 index ac861b83..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/mmconf-fam10h_64.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * AMD Family 10h mmconfig enablement - */ - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/pci.h> -#include <linux/dmi.h> -#include <linux/range.h> - -#include <asm/pci-direct.h> -#include <linux/sort.h> -#include <asm/io.h> -#include <asm/msr.h> -#include <asm/acpi.h> -#include <asm/mmconfig.h> -#include <asm/pci_x86.h> - -struct pci_hostbridge_probe { - u32 bus; - u32 slot; - u32 vendor; - u32 device; -}; - -static u64 __cpuinitdata fam10h_pci_mmconf_base; - -static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { - { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, - { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, -}; - -static int __cpuinit cmp_range(const void *x1, const void *x2) -{ - const struct range *r1 = x1; - const struct range *r2 = x2; - int start1, start2; - - start1 = r1->start >> 32; - start2 = r2->start >> 32; - - return start1 - start2; -} - -#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT) -#define MMCONF_MASK (~(MMCONF_UNIT - 1)) -#define MMCONF_SIZE (MMCONF_UNIT << 8) -/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */ -#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) -#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40)) -static void __cpuinit get_fam10h_pci_mmconf_base(void) -{ - int i; - unsigned bus; - unsigned slot; - int found; - - u64 val; - u32 address; - u64 tom2; - u64 base = FAM10H_PCI_MMCONF_BASE; - - int hi_mmio_num; - struct range range[8]; - - /* only try to get setting from BSP */ - if (fam10h_pci_mmconf_base) - return; - - if (!early_pci_allowed()) - return; - - found = 0; - for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { - u32 id; - u16 device; - u16 vendor; - - bus = pci_probes[i].bus; - slot = pci_probes[i].slot; - id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); - - vendor = id & 0xffff; - device = (id>>16) & 0xffff; - if (pci_probes[i].vendor == vendor && - pci_probes[i].device == device) { - found = 1; - break; - } - } - - if (!found) - return; - - /* SYS_CFG */ - address = MSR_K8_SYSCFG; - rdmsrl(address, val); - - /* TOP_MEM2 is not enabled? */ - if (!(val & (1<<21))) { - tom2 = 1ULL << 32; - } else { - /* TOP_MEM2 */ - address = MSR_K8_TOP_MEM2; - rdmsrl(address, val); - tom2 = max(val & 0xffffff800000ULL, 1ULL << 32); - } - - if (base <= tom2) - base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK; - - /* - * need to check if the range is in the high mmio range that is - * above 4G - */ - hi_mmio_num = 0; - for (i = 0; i < 8; i++) { - u32 reg; - u64 start; - u64 end; - reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); - if (!(reg & 3)) - continue; - - start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/ - reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); - end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/ - - if (end < tom2) - continue; - - range[hi_mmio_num].start = start; - range[hi_mmio_num].end = end; - hi_mmio_num++; - } - - if (!hi_mmio_num) - goto out; - - /* sort the range */ - sort(range, hi_mmio_num, sizeof(struct range), cmp_range, NULL); - - if (range[hi_mmio_num - 1].end < base) - goto out; - if (range[0].start > base + MMCONF_SIZE) - goto out; - - /* need to find one window */ - base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT; - if ((base > tom2) && BASE_VALID(base)) - goto out; - base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK; - if (BASE_VALID(base)) - goto out; - /* need to find window between ranges */ - for (i = 1; i < hi_mmio_num; i++) { - base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK; - val = range[i].start & MMCONF_MASK; - if (val >= base + MMCONF_SIZE && BASE_VALID(base)) - goto out; - } - return; - -out: - fam10h_pci_mmconf_base = base; -} - -void __cpuinit fam10h_check_enable_mmcfg(void) -{ - u64 val; - u32 address; - - if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) - return; - - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, val); - - /* try to make sure that AP's setting is identical to BSP setting */ - if (val & FAM10H_MMIO_CONF_ENABLE) { - unsigned busnbits; - busnbits = (val >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & - FAM10H_MMIO_CONF_BUSRANGE_MASK; - - /* only trust the one handle 256 buses, if acpi=off */ - if (!acpi_pci_disabled || busnbits >= 8) { - u64 base = val & MMCONF_MASK; - - if (!fam10h_pci_mmconf_base) { - fam10h_pci_mmconf_base = base; - return; - } else if (fam10h_pci_mmconf_base == base) - return; - } - } - - /* - * if it is not enabled, try to enable it and assume only one segment - * with 256 buses - */ - get_fam10h_pci_mmconf_base(); - if (!fam10h_pci_mmconf_base) { - pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF; - return; - } - - printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); - val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | - (FAM10H_MMIO_CONF_BUSRANGE_MASK<<FAM10H_MMIO_CONF_BUSRANGE_SHIFT)); - val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) | - FAM10H_MMIO_CONF_ENABLE; - wrmsrl(address, val); -} - -static int __init set_check_enable_amd_mmconf(const struct dmi_system_id *d) -{ - pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; - return 0; -} - -static const struct dmi_system_id __initconst mmconf_dmi_table[] = { - { - .callback = set_check_enable_amd_mmconf, - .ident = "Sun Microsystems Machine", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Sun Microsystems"), - }, - }, - {} -}; - -/* Called from a __cpuinit function, but only on the BSP. */ -void __ref check_enable_amd_mmconf_dmi(void) -{ - dmi_check_system(mmconf_dmi_table); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/module.c b/ANDROID_3.4.5/arch/x86/kernel/module.c deleted file mode 100644 index f21fd94a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/module.c +++ /dev/null @@ -1,205 +0,0 @@ -/* Kernel module help for x86. - Copyright (C) 2001 Rusty Russell. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -#include <linux/moduleloader.h> -#include <linux/elf.h> -#include <linux/vmalloc.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/bug.h> -#include <linux/mm.h> -#include <linux/gfp.h> -#include <linux/jump_label.h> - -#include <asm/page.h> -#include <asm/pgtable.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt...) -#endif - -void *module_alloc(unsigned long size) -{ - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, - -1, __builtin_return_address(0)); -} - -#ifdef CONFIG_X86_32 -int apply_relocate(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - unsigned int i; - Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; - Elf32_Sym *sym; - uint32_t *location; - - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { - /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; - /* This is the symbol it is referring to. Note that all - undefined symbols have been resolved. */ - sym = (Elf32_Sym *)sechdrs[symindex].sh_addr - + ELF32_R_SYM(rel[i].r_info); - - switch (ELF32_R_TYPE(rel[i].r_info)) { - case R_386_32: - /* We add the value into the location given */ - *location += sym->st_value; - break; - case R_386_PC32: - /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; - break; - default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", - me->name, ELF32_R_TYPE(rel[i].r_info)); - return -ENOEXEC; - } - } - return 0; -} -#else /*X86_64*/ -int apply_relocate_add(Elf64_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - unsigned int i; - Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; - Elf64_Sym *sym; - void *loc; - u64 val; - - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { - /* This is where to make the change */ - loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; - - /* This is the symbol it is referring to. Note that all - undefined symbols have been resolved. */ - sym = (Elf64_Sym *)sechdrs[symindex].sh_addr - + ELF64_R_SYM(rel[i].r_info); - - DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); - - val = sym->st_value + rel[i].r_addend; - - switch (ELF64_R_TYPE(rel[i].r_info)) { - case R_X86_64_NONE: - break; - case R_X86_64_64: - *(u64 *)loc = val; - break; - case R_X86_64_32: - *(u32 *)loc = val; - if (val != *(u32 *)loc) - goto overflow; - break; - case R_X86_64_32S: - *(s32 *)loc = val; - if ((s64)val != *(s32 *)loc) - goto overflow; - break; - case R_X86_64_PC32: - val -= (u64)loc; - *(u32 *)loc = val; -#if 0 - if ((s64)val != *(s32 *)loc) - goto overflow; -#endif - break; - default: - printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", - me->name, ELF64_R_TYPE(rel[i].r_info)); - return -ENOEXEC; - } - } - return 0; - -overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), val); - printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", - me->name); - return -ENOEXEC; -} -#endif - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, - *para = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".text", secstrings + s->sh_name)) - text = s; - if (!strcmp(".altinstructions", secstrings + s->sh_name)) - alt = s; - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks = s; - if (!strcmp(".parainstructions", secstrings + s->sh_name)) - para = s; - } - - if (alt) { - /* patch .altinstructions */ - void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size); - } - if (locks && text) { - void *lseg = (void *)locks->sh_addr; - void *tseg = (void *)text->sh_addr; - alternatives_smp_module_add(me, me->name, - lseg, lseg + locks->sh_size, - tseg, tseg + text->sh_size); - } - - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - - /* make jump label nops */ - jump_label_apply_nops(me); - - return 0; -} - -void module_arch_cleanup(struct module *mod) -{ - alternatives_smp_module_del(mod); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/mpparse.c b/ANDROID_3.4.5/arch/x86/kernel/mpparse.c deleted file mode 100644 index ca470e4c..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/mpparse.c +++ /dev/null @@ -1,921 +0,0 @@ -/* - * Intel Multiprocessor Specification 1.1 and 1.4 - * compliant MP-table parsing routines. - * - * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> - * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> - * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> - */ - -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/kernel_stat.h> -#include <linux/mc146818rtc.h> -#include <linux/bitops.h> -#include <linux/acpi.h> -#include <linux/module.h> -#include <linux/smp.h> -#include <linux/pci.h> - -#include <asm/mtrr.h> -#include <asm/mpspec.h> -#include <asm/pgalloc.h> -#include <asm/io_apic.h> -#include <asm/proto.h> -#include <asm/bios_ebda.h> -#include <asm/e820.h> -#include <asm/trampoline.h> -#include <asm/setup.h> -#include <asm/smp.h> - -#include <asm/apic.h> -/* - * Checksum an MP configuration block. - */ - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -int __init default_mpc_apic_id(struct mpc_cpu *m) -{ - return m->apicid; -} - -static void __init MP_processor_info(struct mpc_cpu *m) -{ - int apicid; - char *bootup_cpu = ""; - - if (!(m->cpuflag & CPU_ENABLED)) { - disabled_cpus++; - return; - } - - apicid = x86_init.mpparse.mpc_apic_id(m); - - if (m->cpuflag & CPU_BOOTPROCESSOR) { - bootup_cpu = " (Bootup-CPU)"; - boot_cpu_physical_apicid = m->apicid; - } - - printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu); - generic_processor_info(apicid, m->apicver); -} - -#ifdef CONFIG_X86_IO_APIC -void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str) -{ - memcpy(str, m->bustype, 6); - str[6] = 0; - apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); -} - -static void __init MP_bus_info(struct mpc_bus *m) -{ - char str[7]; - - x86_init.mpparse.mpc_oem_bus_info(m, str); - -#if MAX_MP_BUSSES < 256 - if (m->busid >= MAX_MP_BUSSES) { - printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->busid, str, MAX_MP_BUSSES - 1); - return; - } -#endif - - set_bit(m->busid, mp_bus_not_pci); - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - mp_bus_id_to_type[m->busid] = MP_BUS_ISA; -#endif - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { - if (x86_init.mpparse.mpc_oem_pci_bus) - x86_init.mpparse.mpc_oem_pci_bus(m); - - clear_bit(m->busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - mp_bus_id_to_type[m->busid] = MP_BUS_PCI; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { - mp_bus_id_to_type[m->busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { - mp_bus_id_to_type[m->busid] = MP_BUS_MCA; -#endif - } else - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); -} - -static void __init MP_ioapic_info(struct mpc_ioapic *m) -{ - if (m->flags & MPC_APIC_USABLE) - mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); -} - -static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) -{ - apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - mp_irq->irqtype, mp_irq->irqflag & 3, - (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, - mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); -} - -#else /* CONFIG_X86_IO_APIC */ -static inline void __init MP_bus_info(struct mpc_bus *m) {} -static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} -#endif /* CONFIG_X86_IO_APIC */ - -static void __init MP_lintsrc_info(struct mpc_lintsrc *m) -{ - apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid, - m->srcbusirq, m->destapic, m->destapiclint); -} - -/* - * Read/parse the MPC - */ -static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) -{ - - if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) { - printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", - mpc->signature[0], mpc->signature[1], - mpc->signature[2], mpc->signature[3]); - return 0; - } - if (mpf_checksum((unsigned char *)mpc, mpc->length)) { - printk(KERN_ERR "MPTABLE: checksum error!\n"); - return 0; - } - if (mpc->spec != 0x01 && mpc->spec != 0x04) { - printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", - mpc->spec); - return 0; - } - if (!mpc->lapic) { - printk(KERN_ERR "MPTABLE: null local APIC address!\n"); - return 0; - } - memcpy(oem, mpc->oem, 8); - oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); - - memcpy(str, mpc->productid, 12); - str[12] = 0; - - printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); - - printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic); - - return 1; -} - -static void skip_entry(unsigned char **ptr, int *count, int size) -{ - *ptr += size; - *count += size; -} - -static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) -{ - printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n" - "type %x\n", *mpt); - print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, - 1, mpc, mpc->length, 1); -} - -void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } - -static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) -{ - char str[16]; - char oem[10]; - - int count = sizeof(*mpc); - unsigned char *mpt = ((unsigned char *)mpc) + count; - - if (!smp_check_mpc(mpc, oem, str)) - return 0; - -#ifdef CONFIG_X86_32 - generic_mps_oem_check(mpc, oem, str); -#endif - /* Initialize the lapic mapping */ - if (!acpi_lapic) - register_lapic_address(mpc->lapic); - - if (early) - return 1; - - if (mpc->oemptr) - x86_init.mpparse.smp_read_mpc_oem(mpc); - - /* - * Now process the configuration blocks. - */ - x86_init.mpparse.mpc_record(0); - - while (count < mpc->length) { - switch (*mpt) { - case MP_PROCESSOR: - /* ACPI may have already provided this data */ - if (!acpi_lapic) - MP_processor_info((struct mpc_cpu *)mpt); - skip_entry(&mpt, &count, sizeof(struct mpc_cpu)); - break; - case MP_BUS: - MP_bus_info((struct mpc_bus *)mpt); - skip_entry(&mpt, &count, sizeof(struct mpc_bus)); - break; - case MP_IOAPIC: - MP_ioapic_info((struct mpc_ioapic *)mpt); - skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); - break; - case MP_INTSRC: - mp_save_irq((struct mpc_intsrc *)mpt); - skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); - break; - case MP_LINTSRC: - MP_lintsrc_info((struct mpc_lintsrc *)mpt); - skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc)); - break; - default: - /* wrong mptable */ - smp_dump_mptable(mpc, mpt); - count = mpc->length; - break; - } - x86_init.mpparse.mpc_record(1); - } - - if (!num_processors) - printk(KERN_ERR "MPTABLE: no processors registered!\n"); - return num_processors; -} - -#ifdef CONFIG_X86_IO_APIC - -static int __init ELCR_trigger(unsigned int irq) -{ - unsigned int port; - - port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; -} - -static void __init construct_default_ioirq_mptable(int mpc_default_type) -{ - struct mpc_intsrc intsrc; - int i; - int ELCR_fallback = 0; - - intsrc.type = MP_INTSRC; - intsrc.irqflag = 0; /* conforming */ - intsrc.srcbus = 0; - intsrc.dstapic = mpc_ioapic_id(0); - - intsrc.irqtype = mp_INT; - - /* - * If true, we have an ISA/PCI system with no IRQ entries - * in the MP table. To prevent the PCI interrupts from being set up - * incorrectly, we try to use the ELCR. The sanity check to see if - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can - * never be level sensitive, so we simply see if the ELCR agrees. - * If it does, we assume it's valid. - */ - if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " - "falling back to ELCR\n"); - - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || - ELCR_trigger(13)) - printk(KERN_ERR "ELCR contains invalid data... " - "not using ELCR\n"); - else { - printk(KERN_INFO - "Using ELCR to identify PCI interrupts\n"); - ELCR_fallback = 1; - } - } - - for (i = 0; i < 16; i++) { - switch (mpc_default_type) { - case 2: - if (i == 0 || i == 13) - continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ - default: - if (i == 2) - continue; /* IRQ2 is never connected */ - } - - if (ELCR_fallback) { - /* - * If the ELCR indicates a level-sensitive interrupt, we - * copy that information over to the MP table in the - * irqflag field (level sensitive, active high polarity). - */ - if (ELCR_trigger(i)) - intsrc.irqflag = 13; - else - intsrc.irqflag = 0; - } - - intsrc.srcbusirq = i; - intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - mp_save_irq(&intsrc); - } - - intsrc.irqtype = mp_ExtINT; - intsrc.srcbusirq = 0; - intsrc.dstirq = 0; /* 8259A to INTIN0 */ - mp_save_irq(&intsrc); -} - - -static void __init construct_ioapic_table(int mpc_default_type) -{ - struct mpc_ioapic ioapic; - struct mpc_bus bus; - - bus.type = MP_BUS; - bus.busid = 0; - switch (mpc_default_type) { - default: - printk(KERN_ERR "???\nUnknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.bustype, "MCA ", 6); - } - MP_bus_info(&bus); - if (mpc_default_type > 4) { - bus.busid = 1; - memcpy(bus.bustype, "PCI ", 6); - MP_bus_info(&bus); - } - - ioapic.type = MP_IOAPIC; - ioapic.apicid = 2; - ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.flags = MPC_APIC_USABLE; - ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE; - MP_ioapic_info(&ioapic); - - /* - * We set up most of the low 16 IO-APIC pins according to MPS rules. - */ - construct_default_ioirq_mptable(mpc_default_type); -} -#else -static inline void __init construct_ioapic_table(int mpc_default_type) { } -#endif - -static inline void __init construct_default_ISA_mptable(int mpc_default_type) -{ - struct mpc_cpu processor; - struct mpc_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.cpuflag = CPU_ENABLED; - processor.cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.featureflag = boot_cpu_data.x86_capability[0]; - processor.reserved[0] = 0; - processor.reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.apicid = i; - MP_processor_info(&processor); - } - - construct_ioapic_table(mpc_default_type); - - lintsrc.type = MP_LINTSRC; - lintsrc.irqflag = 0; /* conforming */ - lintsrc.srcbusid = 0; - lintsrc.srcbusirq = 0; - lintsrc.destapic = MP_APIC_ALL; - for (i = 0; i < 2; i++) { - lintsrc.irqtype = linttypes[i]; - lintsrc.destapiclint = i; - MP_lintsrc_info(&lintsrc); - } -} - -static struct mpf_intel *mpf_found; - -static unsigned long __init get_mpc_size(unsigned long physptr) -{ - struct mpc_table *mpc; - unsigned long size; - - mpc = early_ioremap(physptr, PAGE_SIZE); - size = mpc->length; - early_iounmap(mpc, PAGE_SIZE); - apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size); - - return size; -} - -static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) -{ - struct mpc_table *mpc; - unsigned long size; - - size = get_mpc_size(mpf->physptr); - mpc = early_ioremap(mpf->physptr, size); - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc(mpc, early)) { -#ifdef CONFIG_X86_LOCAL_APIC - smp_found_config = 0; -#endif - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n" - "... disabling SMP support. (tell your hw vendor)\n"); - early_iounmap(mpc, size); - return -1; - } - early_iounmap(mpc, size); - - if (early) - return -1; - -#ifdef CONFIG_X86_IO_APIC - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_bus bus; - - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " - "using default mptable. (tell your hw vendor)\n"); - - bus.type = MP_BUS; - bus.busid = 0; - memcpy(bus.bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } -#endif - - return 0; -} - -/* - * Scan the memory blocks for an SMP configuration block. - */ -void __init default_get_smp_config(unsigned int early) -{ - struct mpf_intel *mpf = mpf_found; - - if (!mpf) - return; - - if (acpi_lapic && early) - return; - - /* - * MPS doesn't support hyperthreading, aka only have - * thread 0 apic id in MPS table - */ - if (acpi_lapic && acpi_ioapic) - return; - - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->specification); -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) - if (mpf->feature2 & (1 << 7)) { - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } -#endif - /* - * Now see if we need to read further. - */ - if (mpf->feature1 != 0) { - if (early) { - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - return; - } - - printk(KERN_INFO "Default MP configuration #%d\n", - mpf->feature1); - construct_default_ISA_mptable(mpf->feature1); - - } else if (mpf->physptr) { - if (check_physptr(mpf, early)) - return; - } else - BUG(); - - if (!early) - printk(KERN_INFO "Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ -} - -static void __init smp_reserve_memory(struct mpf_intel *mpf) -{ - memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr)); -} - -static int __init smp_scan_config(unsigned long base, unsigned long length) -{ - unsigned int *bp = phys_to_virt(base); - struct mpf_intel *mpf; - unsigned long mem; - - apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", - bp, length); - BUILD_BUG_ON(sizeof(*mpf) != 16); - - while (length > 0) { - mpf = (struct mpf_intel *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->specification == 1) - || (mpf->specification == 4))) { -#ifdef CONFIG_X86_LOCAL_APIC - smp_found_config = 1; -#endif - mpf_found = mpf; - - printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", - mpf, (u64)virt_to_phys(mpf)); - - mem = virt_to_phys(mpf); - memblock_reserve(mem, sizeof(*mpf)); - if (mpf->physptr) - smp_reserve_memory(mpf); - - return 1; - } - bp += 4; - length -= 16; - } - return 0; -} - -void __init default_find_smp_config(void) -{ - unsigned int address; - - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0, 0x400) || - smp_scan_config(639 * 0x400, 0x400) || - smp_scan_config(0xF0000, 0x10000)) - return; - /* - * If it is an SMP machine we should know now, unless the - * configuration is in an EISA/MCA bus machine with an - * extended bios data area. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There are Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. These loaders are buggy and - * should be fixed. - * - * MP1.4 SPEC states to only scan first 1K of 4K EBDA. - */ - - address = get_bios_ebda(); - if (address) - smp_scan_config(address, 0x400); -} - -#ifdef CONFIG_X86_IO_APIC -static u8 __initdata irq_used[MAX_IRQ_SOURCES]; - -static int __init get_MP_intsrc_index(struct mpc_intsrc *m) -{ - int i; - - if (m->irqtype != mp_INT) - return 0; - - if (m->irqflag != 0x0f) - return 0; - - /* not legacy */ - - for (i = 0; i < mp_irq_entries; i++) { - if (mp_irqs[i].irqtype != mp_INT) - continue; - - if (mp_irqs[i].irqflag != 0x0f) - continue; - - if (mp_irqs[i].srcbus != m->srcbus) - continue; - if (mp_irqs[i].srcbusirq != m->srcbusirq) - continue; - if (irq_used[i]) { - /* already claimed */ - return -2; - } - irq_used[i] = 1; - return i; - } - - /* not found */ - return -1; -} - -#define SPARE_SLOT_NUM 20 - -static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; - -static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) -{ - int i; - - apic_printk(APIC_VERBOSE, "OLD "); - print_mp_irq_info(m); - - i = get_MP_intsrc_index(m); - if (i > 0) { - memcpy(m, &mp_irqs[i], sizeof(*m)); - apic_printk(APIC_VERBOSE, "NEW "); - print_mp_irq_info(&mp_irqs[i]); - return; - } - if (!i) { - /* legacy, do nothing */ - return; - } - if (*nr_m_spare < SPARE_SLOT_NUM) { - /* - * not found (-1), or duplicated (-2) are invalid entries, - * we need to use the slot later - */ - m_spare[*nr_m_spare] = m; - *nr_m_spare += 1; - } -} - -static int __init -check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) -{ - if (!mpc_new_phys || count <= mpc_new_length) { - WARN(1, "update_mptable: No spare slots (length: %x)\n", count); - return -1; - } - - return 0; -} -#else /* CONFIG_X86_IO_APIC */ -static -inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} -#endif /* CONFIG_X86_IO_APIC */ - -static int __init replace_intsrc_all(struct mpc_table *mpc, - unsigned long mpc_new_phys, - unsigned long mpc_new_length) -{ -#ifdef CONFIG_X86_IO_APIC - int i; -#endif - int count = sizeof(*mpc); - int nr_m_spare = 0; - unsigned char *mpt = ((unsigned char *)mpc) + count; - - printk(KERN_INFO "mpc_length %x\n", mpc->length); - while (count < mpc->length) { - switch (*mpt) { - case MP_PROCESSOR: - skip_entry(&mpt, &count, sizeof(struct mpc_cpu)); - break; - case MP_BUS: - skip_entry(&mpt, &count, sizeof(struct mpc_bus)); - break; - case MP_IOAPIC: - skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); - break; - case MP_INTSRC: - check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare); - skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); - break; - case MP_LINTSRC: - skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc)); - break; - default: - /* wrong mptable */ - smp_dump_mptable(mpc, mpt); - goto out; - } - } - -#ifdef CONFIG_X86_IO_APIC - for (i = 0; i < mp_irq_entries; i++) { - if (irq_used[i]) - continue; - - if (mp_irqs[i].irqtype != mp_INT) - continue; - - if (mp_irqs[i].irqflag != 0x0f) - continue; - - if (nr_m_spare > 0) { - apic_printk(APIC_VERBOSE, "*NEW* found\n"); - nr_m_spare--; - memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i])); - m_spare[nr_m_spare] = NULL; - } else { - struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; - count += sizeof(struct mpc_intsrc); - if (check_slot(mpc_new_phys, mpc_new_length, count) < 0) - goto out; - memcpy(m, &mp_irqs[i], sizeof(*m)); - mpc->length = count; - mpt += sizeof(struct mpc_intsrc); - } - print_mp_irq_info(&mp_irqs[i]); - } -#endif -out: - /* update checksum */ - mpc->checksum = 0; - mpc->checksum -= mpf_checksum((unsigned char *)mpc, mpc->length); - - return 0; -} - -int enable_update_mptable; - -static int __init update_mptable_setup(char *str) -{ - enable_update_mptable = 1; -#ifdef CONFIG_PCI - pci_routeirq = 1; -#endif - return 0; -} -early_param("update_mptable", update_mptable_setup); - -static unsigned long __initdata mpc_new_phys; -static unsigned long mpc_new_length __initdata = 4096; - -/* alloc_mptable or alloc_mptable=4k */ -static int __initdata alloc_mptable; -static int __init parse_alloc_mptable_opt(char *p) -{ - enable_update_mptable = 1; -#ifdef CONFIG_PCI - pci_routeirq = 1; -#endif - alloc_mptable = 1; - if (!p) - return 0; - mpc_new_length = memparse(p, &p); - return 0; -} -early_param("alloc_mptable", parse_alloc_mptable_opt); - -void __init early_reserve_e820_mpc_new(void) -{ - if (enable_update_mptable && alloc_mptable) - mpc_new_phys = early_reserve_e820(mpc_new_length, 4); -} - -static int __init update_mp_table(void) -{ - char str[16]; - char oem[10]; - struct mpf_intel *mpf; - struct mpc_table *mpc, *mpc_new; - - if (!enable_update_mptable) - return 0; - - mpf = mpf_found; - if (!mpf) - return 0; - - /* - * Now see if we need to go further. - */ - if (mpf->feature1 != 0) - return 0; - - if (!mpf->physptr) - return 0; - - mpc = phys_to_virt(mpf->physptr); - - if (!smp_check_mpc(mpc, oem, str)) - return 0; - - printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); - printk(KERN_INFO "physptr: %x\n", mpf->physptr); - - if (mpc_new_phys && mpc->length > mpc_new_length) { - mpc_new_phys = 0; - printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", - mpc_new_length); - } - - if (!mpc_new_phys) { - unsigned char old, new; - /* check if we can change the position */ - mpc->checksum = 0; - old = mpf_checksum((unsigned char *)mpc, mpc->length); - mpc->checksum = 0xff; - new = mpf_checksum((unsigned char *)mpc, mpc->length); - if (old == new) { - printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); - return 0; - } - printk(KERN_INFO "use in-position replacing\n"); - } else { - mpf->physptr = mpc_new_phys; - mpc_new = phys_to_virt(mpc_new_phys); - memcpy(mpc_new, mpc, mpc->length); - mpc = mpc_new; - /* check if we can modify that */ - if (mpc_new_phys - mpf->physptr) { - struct mpf_intel *mpf_new; - /* steal 16 bytes from [0, 1k) */ - printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); - mpf_new = phys_to_virt(0x400 - 16); - memcpy(mpf_new, mpf, 16); - mpf = mpf_new; - mpf->physptr = mpc_new_phys; - } - mpf->checksum = 0; - mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "physptr new: %x\n", mpf->physptr); - } - - /* - * only replace the one with mp_INT and - * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW, - * already in mp_irqs , stored by ... and mp_config_acpi_gsi, - * may need pci=routeirq for all coverage - */ - replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); - - return 0; -} - -late_initcall(update_mp_table); diff --git a/ANDROID_3.4.5/arch/x86/kernel/msr.c b/ANDROID_3.4.5/arch/x86/kernel/msr.c deleted file mode 100644 index eb113693..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/msr.c +++ /dev/null @@ -1,296 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved - * Copyright 2009 Intel Corporation; author: H. Peter Anvin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * x86 MSR access device - * - * This device is accessed by lseek() to the appropriate register number - * and then read/write in chunks of 8 bytes. A larger size means multiple - * reads or writes of the same register. - * - * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include <linux/module.h> - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/smp.h> -#include <linux/major.h> -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/cpu.h> -#include <linux/notifier.h> -#include <linux/uaccess.h> -#include <linux/gfp.h> - -#include <asm/processor.h> -#include <asm/msr.h> - -static struct class *msr_class; - -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file->f_mapping->host; - - mutex_lock(&inode->i_mutex); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - -static ssize_t msr_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - u32 __user *tmp = (u32 __user *) buf; - u32 data[2]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - int err = 0; - ssize_t bytes = 0; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 8) { - err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); - if (err) - break; - if (copy_to_user(tmp, &data, 8)) { - err = -EFAULT; - break; - } - tmp += 2; - bytes += 8; - } - - return bytes ? bytes : err; -} - -static ssize_t msr_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - const u32 __user *tmp = (const u32 __user *)buf; - u32 data[2]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - int err = 0; - ssize_t bytes = 0; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 8) { - if (copy_from_user(&data, tmp, 8)) { - err = -EFAULT; - break; - } - err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); - if (err) - break; - tmp += 2; - bytes += 8; - } - - return bytes ? bytes : err; -} - -static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) -{ - u32 __user *uregs = (u32 __user *)arg; - u32 regs[8]; - int cpu = iminor(file->f_path.dentry->d_inode); - int err; - - switch (ioc) { - case X86_IOC_RDMSR_REGS: - if (!(file->f_mode & FMODE_READ)) { - err = -EBADF; - break; - } - if (copy_from_user(®s, uregs, sizeof regs)) { - err = -EFAULT; - break; - } - err = rdmsr_safe_regs_on_cpu(cpu, regs); - if (err) - break; - if (copy_to_user(uregs, ®s, sizeof regs)) - err = -EFAULT; - break; - - case X86_IOC_WRMSR_REGS: - if (!(file->f_mode & FMODE_WRITE)) { - err = -EBADF; - break; - } - if (copy_from_user(®s, uregs, sizeof regs)) { - err = -EFAULT; - break; - } - err = wrmsr_safe_regs_on_cpu(cpu, regs); - if (err) - break; - if (copy_to_user(uregs, ®s, sizeof regs)) - err = -EFAULT; - break; - - default: - err = -ENOTTY; - break; - } - - return err; -} - -static int msr_open(struct inode *inode, struct file *file) -{ - unsigned int cpu; - struct cpuinfo_x86 *c; - - cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - - c = &cpu_data(cpu); - if (!cpu_has(c, X86_FEATURE_MSR)) - return -EIO; /* MSR not supported */ - - return 0; -} - -/* - * File operations we support - */ -static const struct file_operations msr_fops = { - .owner = THIS_MODULE, - .llseek = msr_seek, - .read = msr_read, - .write = msr_write, - .open = msr_open, - .unlocked_ioctl = msr_ioctl, - .compat_ioctl = msr_ioctl, -}; - -static int __cpuinit msr_device_create(int cpu) -{ - struct device *dev; - - dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL, - "msr%d", cpu); - return IS_ERR(dev) ? PTR_ERR(dev) : 0; -} - -static void msr_device_destroy(int cpu) -{ - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); -} - -static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - int err = 0; - - switch (action) { - case CPU_UP_PREPARE: - err = msr_device_create(cpu); - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - msr_device_destroy(cpu); - break; - } - return notifier_from_errno(err); -} - -static struct notifier_block __refdata msr_class_cpu_notifier = { - .notifier_call = msr_class_cpu_callback, -}; - -static char *msr_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); -} - -static int __init msr_init(void) -{ - int i, err = 0; - i = 0; - - if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { - printk(KERN_ERR "msr: unable to get major %d for msr\n", - MSR_MAJOR); - err = -EBUSY; - goto out; - } - msr_class = class_create(THIS_MODULE, "msr"); - if (IS_ERR(msr_class)) { - err = PTR_ERR(msr_class); - goto out_chrdev; - } - msr_class->devnode = msr_devnode; - for_each_online_cpu(i) { - err = msr_device_create(i); - if (err != 0) - goto out_class; - } - register_hotcpu_notifier(&msr_class_cpu_notifier); - - err = 0; - goto out; - -out_class: - i = 0; - for_each_online_cpu(i) - msr_device_destroy(i); - class_destroy(msr_class); -out_chrdev: - __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); -out: - return err; -} - -static void __exit msr_exit(void) -{ - int cpu = 0; - for_each_online_cpu(cpu) - msr_device_destroy(cpu); - class_destroy(msr_class); - __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); - unregister_hotcpu_notifier(&msr_class_cpu_notifier); -} - -module_init(msr_init); -module_exit(msr_exit) - -MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>"); -MODULE_DESCRIPTION("x86 generic MSR driver"); -MODULE_LICENSE("GPL"); diff --git a/ANDROID_3.4.5/arch/x86/kernel/nmi.c b/ANDROID_3.4.5/arch/x86/kernel/nmi.c deleted file mode 100644 index 32856fa4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/nmi.c +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - * Copyright (C) 2011 Don Zickus Red Hat, Inc. - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@valinux.com>, May 2000 - */ - -/* - * Handle hardware traps and faults. - */ -#include <linux/spinlock.h> -#include <linux/kprobes.h> -#include <linux/kdebug.h> -#include <linux/nmi.h> -#include <linux/delay.h> -#include <linux/hardirq.h> -#include <linux/slab.h> -#include <linux/export.h> - -#include <linux/mca.h> - -#if defined(CONFIG_EDAC) -#include <linux/edac.h> -#endif - -#include <linux/atomic.h> -#include <asm/traps.h> -#include <asm/mach_traps.h> -#include <asm/nmi.h> -#include <asm/x86_init.h> - -#define NMI_MAX_NAMELEN 16 -struct nmiaction { - struct list_head list; - nmi_handler_t handler; - unsigned int flags; - char *name; -}; - -struct nmi_desc { - spinlock_t lock; - struct list_head head; -}; - -static struct nmi_desc nmi_desc[NMI_MAX] = -{ - { - .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), - .head = LIST_HEAD_INIT(nmi_desc[0].head), - }, - { - .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), - .head = LIST_HEAD_INIT(nmi_desc[1].head), - }, - -}; - -struct nmi_stats { - unsigned int normal; - unsigned int unknown; - unsigned int external; - unsigned int swallow; -}; - -static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); - -static int ignore_nmis; - -int unknown_nmi_panic; -/* - * Prevent NMI reason port (0x61) being accessed simultaneously, can - * only be used in NMI handler. - */ -static DEFINE_RAW_SPINLOCK(nmi_reason_lock); - -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -#define nmi_to_desc(type) (&nmi_desc[type]) - -static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) -{ - struct nmi_desc *desc = nmi_to_desc(type); - struct nmiaction *a; - int handled=0; - - rcu_read_lock(); - - /* - * NMIs are edge-triggered, which means if you have enough - * of them concurrently, you can lose some because only one - * can be latched at any given time. Walk the whole list - * to handle those situations. - */ - list_for_each_entry_rcu(a, &desc->head, list) - handled += a->handler(type, regs); - - rcu_read_unlock(); - - /* return total number of NMI events handled */ - return handled; -} - -static int __setup_nmi(unsigned int type, struct nmiaction *action) -{ - struct nmi_desc *desc = nmi_to_desc(type); - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - - /* - * most handlers of type NMI_UNKNOWN never return because - * they just assume the NMI is theirs. Just a sanity check - * to manage expectations - */ - WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); - - /* - * some handlers need to be executed first otherwise a fake - * event confuses some handlers (kdump uses this flag) - */ - if (action->flags & NMI_FLAG_FIRST) - list_add_rcu(&action->list, &desc->head); - else - list_add_tail_rcu(&action->list, &desc->head); - - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - -static struct nmiaction *__free_nmi(unsigned int type, const char *name) -{ - struct nmi_desc *desc = nmi_to_desc(type); - struct nmiaction *n; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - - list_for_each_entry_rcu(n, &desc->head, list) { - /* - * the name passed in to describe the nmi handler - * is used as the lookup key - */ - if (!strcmp(n->name, name)) { - WARN(in_nmi(), - "Trying to free NMI (%s) from NMI context!\n", n->name); - list_del_rcu(&n->list); - break; - } - } - - spin_unlock_irqrestore(&desc->lock, flags); - synchronize_rcu(); - return (n); -} - -int register_nmi_handler(unsigned int type, nmi_handler_t handler, - unsigned long nmiflags, const char *devname) -{ - struct nmiaction *action; - int retval = -ENOMEM; - - if (!handler) - return -EINVAL; - - action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); - if (!action) - goto fail_action; - - action->handler = handler; - action->flags = nmiflags; - action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); - if (!action->name) - goto fail_action_name; - - retval = __setup_nmi(type, action); - - if (retval) - goto fail_setup_nmi; - - return retval; - -fail_setup_nmi: - kfree(action->name); -fail_action_name: - kfree(action); -fail_action: - - return retval; -} -EXPORT_SYMBOL_GPL(register_nmi_handler); - -void unregister_nmi_handler(unsigned int type, const char *name) -{ - struct nmiaction *a; - - a = __free_nmi(type, name); - if (a) { - kfree(a->name); - kfree(a); - } -} - -EXPORT_SYMBOL_GPL(unregister_nmi_handler); - -static notrace __kprobes void -pci_serr_error(unsigned char reason, struct pt_regs *regs) -{ - pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - /* - * On some machines, PCI SERR line is used to report memory - * errors. EDAC makes use of it. - */ -#if defined(CONFIG_EDAC) - if (edac_handler_set()) { - edac_atomic_assert_error(); - return; - } -#endif - - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); - - /* Clear and disable the PCI SERR error line. */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -io_check_error(unsigned char reason, struct pt_regs *regs) -{ - unsigned long i; - - pr_emerg( - "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - show_registers(regs); - - if (panic_on_io_nmi) - panic("NMI IOCK error: Not continuing"); - - /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); - - i = 20000; - while (--i) { - touch_nmi_watchdog(); - udelay(100); - } - - reason &= ~NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs *regs) -{ - int handled; - - /* - * Use 'false' as back-to-back NMIs are dealt with one level up. - * Of course this makes having multiple 'unknown' handlers useless - * as only the first one is ever run (unless it can actually determine - * if it caused the NMI) - */ - handled = nmi_handle(NMI_UNKNOWN, regs, false); - if (handled) { - __this_cpu_add(nmi_stats.unknown, handled); - return; - } - - __this_cpu_add(nmi_stats.unknown, 1); - -#ifdef CONFIG_MCA - /* - * Might actually be able to figure out what the guilty party - * is: - */ - if (MCA_bus) { - mca_handle_nmi(); - return; - } -#endif - pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - pr_emerg("Do you have a strange power saving mode enabled?\n"); - if (unknown_nmi_panic || panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); -} - -static DEFINE_PER_CPU(bool, swallow_nmi); -static DEFINE_PER_CPU(unsigned long, last_nmi_rip); - -static notrace __kprobes void default_do_nmi(struct pt_regs *regs) -{ - unsigned char reason = 0; - int handled; - bool b2b = false; - - /* - * CPU-specific NMI must be processed before non-CPU-specific - * NMI, otherwise we may lose it, because the CPU-specific - * NMI can not be detected/processed on other CPUs. - */ - - /* - * Back-to-back NMIs are interesting because they can either - * be two NMI or more than two NMIs (any thing over two is dropped - * due to NMI being edge-triggered). If this is the second half - * of the back-to-back NMI, assume we dropped things and process - * more handlers. Otherwise reset the 'swallow' NMI behaviour - */ - if (regs->ip == __this_cpu_read(last_nmi_rip)) - b2b = true; - else - __this_cpu_write(swallow_nmi, false); - - __this_cpu_write(last_nmi_rip, regs->ip); - - handled = nmi_handle(NMI_LOCAL, regs, b2b); - __this_cpu_add(nmi_stats.normal, handled); - if (handled) { - /* - * There are cases when a NMI handler handles multiple - * events in the current NMI. One of these events may - * be queued for in the next NMI. Because the event is - * already handled, the next NMI will result in an unknown - * NMI. Instead lets flag this for a potential NMI to - * swallow. - */ - if (handled > 1) - __this_cpu_write(swallow_nmi, true); - return; - } - - /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - raw_spin_lock(&nmi_reason_lock); - reason = x86_platform.get_nmi_reason(); - - if (reason & NMI_REASON_MASK) { - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - else if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active - * meanwhile as it's edge-triggered: - */ - reassert_nmi(); -#endif - __this_cpu_add(nmi_stats.external, 1); - raw_spin_unlock(&nmi_reason_lock); - return; - } - raw_spin_unlock(&nmi_reason_lock); - - /* - * Only one NMI can be latched at a time. To handle - * this we may process multiple nmi handlers at once to - * cover the case where an NMI is dropped. The downside - * to this approach is we may process an NMI prematurely, - * while its real NMI is sitting latched. This will cause - * an unknown NMI on the next run of the NMI processing. - * - * We tried to flag that condition above, by setting the - * swallow_nmi flag when we process more than one event. - * This condition is also only present on the second half - * of a back-to-back NMI, so we flag that condition too. - * - * If both are true, we assume we already processed this - * NMI previously and we swallow it. Otherwise we reset - * the logic. - * - * There are scenarios where we may accidentally swallow - * a 'real' unknown NMI. For example, while processing - * a perf NMI another perf NMI comes in along with a - * 'real' unknown NMI. These two NMIs get combined into - * one (as descibed above). When the next NMI gets - * processed, it will be flagged by perf as handled, but - * noone will know that there was a 'real' unknown NMI sent - * also. As a result it gets swallowed. Or if the first - * perf NMI returns two events handled then the second - * NMI will get eaten by the logic below, again losing a - * 'real' unknown NMI. But this is the best we can do - * for now. - */ - if (b2b && __this_cpu_read(swallow_nmi)) - __this_cpu_add(nmi_stats.swallow, 1); - else - unknown_nmi_error(reason, regs); -} - -/* - * NMIs can hit breakpoints which will cause it to lose its - * NMI context with the CPU when the breakpoint does an iret. - */ -#ifdef CONFIG_X86_32 -/* - * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C. Simply have 3 states - * the NMI can be in. - * - * 1) not running - * 2) executing - * 3) latched - * - * When no NMI is in progress, it is in the "not running" state. - * When an NMI comes in, it goes into the "executing" state. - * Normally, if another NMI is triggered, it does not interrupt - * the running NMI and the HW will simply latch it so that when - * the first NMI finishes, it will restart the second NMI. - * (Note, the latch is binary, thus multiple NMIs triggering, - * when one is running, are ignored. Only one NMI is restarted.) - * - * If an NMI hits a breakpoint that executes an iret, another - * NMI can preempt it. We do not want to allow this new NMI - * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the first NMI will perform - * an cmpxchg on the state, and if it doesn't successfully - * reset the state to "not running" it will restart the next - * NMI. - */ -enum nmi_states { - NMI_NOT_RUNNING, - NMI_EXECUTING, - NMI_LATCHED, -}; -static DEFINE_PER_CPU(enum nmi_states, nmi_state); - -#define nmi_nesting_preprocess(regs) \ - do { \ - if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ - __get_cpu_var(nmi_state) = NMI_LATCHED; \ - return; \ - } \ - nmi_restart: \ - __get_cpu_var(nmi_state) = NMI_EXECUTING; \ - } while (0) - -#define nmi_nesting_postprocess() \ - do { \ - if (cmpxchg(&__get_cpu_var(nmi_state), \ - NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ - goto nmi_restart; \ - } while (0) -#else /* x86_64 */ -/* - * In x86_64 things are a bit more difficult. This has the same problem - * where an NMI hitting a breakpoint that calls iret will remove the - * NMI context, allowing a nested NMI to enter. What makes this more - * difficult is that both NMIs and breakpoints have their own stack. - * When a new NMI or breakpoint is executed, the stack is set to a fixed - * point. If an NMI is nested, it will have its stack set at that same - * fixed address that the first NMI had, and will start corrupting the - * stack. This is handled in entry_64.S, but the same problem exists with - * the breakpoint stack. - * - * If a breakpoint is being processed, and the debug stack is being used, - * if an NMI comes in and also hits a breakpoint, the stack pointer - * will be set to the same fixed address as the breakpoint that was - * interrupted, causing that stack to be corrupted. To handle this case, - * check if the stack that was interrupted is the debug stack, and if - * so, change the IDT so that new breakpoints will use the current stack - * and not switch to the fixed address. On return of the NMI, switch back - * to the original IDT. - */ -static DEFINE_PER_CPU(int, update_debug_stack); - -static inline void nmi_nesting_preprocess(struct pt_regs *regs) -{ - /* - * If we interrupted a breakpoint, it is possible that - * the nmi handler will have breakpoints too. We need to - * change the IDT such that breakpoints that happen here - * continue to use the NMI stack. - */ - if (unlikely(is_debug_stack(regs->sp))) { - debug_stack_set_zero(); - this_cpu_write(update_debug_stack, 1); - } -} - -static inline void nmi_nesting_postprocess(void) -{ - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -} -#endif - -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_nesting_preprocess(regs); - - nmi_enter(); - - inc_irq_stat(__nmi_count); - - if (!ignore_nmis) - default_do_nmi(regs); - - nmi_exit(); - - /* On i386, may loop back to preprocess */ - nmi_nesting_postprocess(); -} - -void stop_nmi(void) -{ - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; -} - -/* reset the back-to-back NMI logic */ -void local_touch_nmi(void) -{ - __this_cpu_write(last_nmi_rip, 0); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/nmi_selftest.c b/ANDROID_3.4.5/arch/x86/kernel/nmi_selftest.c deleted file mode 100644 index 2c39dcd5..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/nmi_selftest.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * arch/x86/kernel/nmi-selftest.c - * - * Testsuite for NMI: IPIs - * - * Started by Don Zickus: - * (using lib/locking-selftest.c as a guide) - * - * Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com> - */ - -#include <linux/smp.h> -#include <linux/cpumask.h> -#include <linux/delay.h> -#include <linux/init.h> - -#include <asm/apic.h> -#include <asm/nmi.h> - -#define SUCCESS 0 -#define FAILURE 1 -#define TIMEOUT 2 - -static int __initdata nmi_fail; - -/* check to see if NMI IPIs work on this machine */ -static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __initdata; - -static int __initdata testcase_total; -static int __initdata testcase_successes; -static int __initdata expected_testcase_failures; -static int __initdata unexpected_testcase_failures; -static int __initdata unexpected_testcase_unknowns; - -static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) -{ - unexpected_testcase_unknowns++; - return NMI_HANDLED; -} - -static void __init init_nmi_testsuite(void) -{ - /* trap all the unknown NMIs we may generate */ - register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); -} - -static void __init cleanup_nmi_testsuite(void) -{ - unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk"); -} - -static int __init test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) -{ - int cpu = raw_smp_processor_id(); - - if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask))) - return NMI_HANDLED; - - return NMI_DONE; -} - -static void __init test_nmi_ipi(struct cpumask *mask) -{ - unsigned long timeout; - - if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, - NMI_FLAG_FIRST, "nmi_selftest")) { - nmi_fail = FAILURE; - return; - } - - /* sync above data before sending NMI */ - wmb(); - - apic->send_IPI_mask(mask, NMI_VECTOR); - - /* Don't wait longer than a second */ - timeout = USEC_PER_SEC; - while (!cpumask_empty(mask) && timeout--) - udelay(1); - - /* What happens if we timeout, do we still unregister?? */ - unregister_nmi_handler(NMI_LOCAL, "nmi_selftest"); - - if (!timeout) - nmi_fail = TIMEOUT; - return; -} - -static void __init remote_ipi(void) -{ - cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); - if (!cpumask_empty(to_cpumask(nmi_ipi_mask))) - test_nmi_ipi(to_cpumask(nmi_ipi_mask)); -} - -static void __init local_ipi(void) -{ - cpumask_clear(to_cpumask(nmi_ipi_mask)); - cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); - test_nmi_ipi(to_cpumask(nmi_ipi_mask)); -} - -static void __init reset_nmi(void) -{ - nmi_fail = 0; -} - -static void __init dotest(void (*testcase_fn)(void), int expected) -{ - testcase_fn(); - /* - * Filter out expected failures: - */ - if (nmi_fail != expected) { - unexpected_testcase_failures++; - - if (nmi_fail == FAILURE) - printk("FAILED |"); - else if (nmi_fail == TIMEOUT) - printk("TIMEOUT|"); - else - printk("ERROR |"); - dump_stack(); - } else { - testcase_successes++; - printk(" ok |"); - } - testcase_total++; - - reset_nmi(); -} - -static inline void __init print_testname(const char *testname) -{ - printk("%12s:", testname); -} - -void __init nmi_selftest(void) -{ - init_nmi_testsuite(); - - /* - * Run the testsuite: - */ - printk("----------------\n"); - printk("| NMI testsuite:\n"); - printk("--------------------\n"); - - print_testname("remote IPI"); - dotest(remote_ipi, SUCCESS); - printk("\n"); - print_testname("local IPI"); - dotest(local_ipi, SUCCESS); - printk("\n"); - - cleanup_nmi_testsuite(); - - if (unexpected_testcase_failures) { - printk("--------------------\n"); - printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n", - unexpected_testcase_failures, testcase_total); - printk("-----------------------------------------------------------------\n"); - } else if (expected_testcase_failures && testcase_successes) { - printk("--------------------\n"); - printk("%3d out of %3d testcases failed, as expected. |\n", - expected_testcase_failures, testcase_total); - printk("----------------------------------------------------\n"); - } else if (expected_testcase_failures && !testcase_successes) { - printk("--------------------\n"); - printk("All %3d testcases failed, as expected. |\n", - expected_testcase_failures); - printk("----------------------------------------\n"); - } else { - printk("--------------------\n"); - printk("Good, all %3d testcases passed! |\n", - testcase_successes); - printk("---------------------------------\n"); - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/paravirt-spinlocks.c b/ANDROID_3.4.5/arch/x86/kernel/paravirt-spinlocks.c deleted file mode 100644 index 676b8c77..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/paravirt-spinlocks.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Split spinlock implementation out into its own file, so it can be - * compiled in a FTRACE-compatible way. - */ -#include <linux/spinlock.h> -#include <linux/module.h> - -#include <asm/paravirt.h> - -static inline void -default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) -{ - arch_spin_lock(lock); -} - -struct pv_lock_ops pv_lock_ops = { -#ifdef CONFIG_SMP - .spin_is_locked = __ticket_spin_is_locked, - .spin_is_contended = __ticket_spin_is_contended, - - .spin_lock = __ticket_spin_lock, - .spin_lock_flags = default_spin_lock_flags, - .spin_trylock = __ticket_spin_trylock, - .spin_unlock = __ticket_spin_unlock, -#endif -}; -EXPORT_SYMBOL(pv_lock_ops); - diff --git a/ANDROID_3.4.5/arch/x86/kernel/paravirt.c b/ANDROID_3.4.5/arch/x86/kernel/paravirt.c deleted file mode 100644 index ab137605..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/paravirt.c +++ /dev/null @@ -1,490 +0,0 @@ -/* Paravirtualization interfaces - Copyright (C) 2006 Rusty Russell IBM Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc -*/ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/efi.h> -#include <linux/bcd.h> -#include <linux/highmem.h> - -#include <asm/bug.h> -#include <asm/paravirt.h> -#include <asm/debugreg.h> -#include <asm/desc.h> -#include <asm/setup.h> -#include <asm/pgtable.h> -#include <asm/time.h> -#include <asm/pgalloc.h> -#include <asm/irq.h> -#include <asm/delay.h> -#include <asm/fixmap.h> -#include <asm/apic.h> -#include <asm/tlbflush.h> -#include <asm/timer.h> -#include <asm/special_insns.h> - -/* nop stub */ -void _paravirt_nop(void) -{ -} - -/* identity function, which can be inlined */ -u32 _paravirt_ident_32(u32 x) -{ - return x; -} - -u64 _paravirt_ident_64(u64 x) -{ - return x; -} - -void __init default_banner(void) -{ - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - pv_info.name); -} - -/* Simple instruction patching code. */ -#define DEF_NATIVE(ops, name, code) \ - extern const char start_##ops##_##name[], end_##ops##_##name[]; \ - asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") - -/* Undefined instruction for dealing with missing ops pointers. */ -static const unsigned char ud2a[] = { 0x0f, 0x0b }; - -unsigned paravirt_patch_nop(void) -{ - return 0; -} - -unsigned paravirt_patch_ignore(unsigned len) -{ - return len; -} - -struct branch { - unsigned char opcode; - u32 delta; -} __attribute__((packed)); - -unsigned paravirt_patch_call(void *insnbuf, - const void *target, u16 tgt_clobbers, - unsigned long addr, u16 site_clobbers, - unsigned len) -{ - struct branch *b = insnbuf; - unsigned long delta = (unsigned long)target - (addr+5); - - if (tgt_clobbers & ~site_clobbers) - return len; /* target would clobber too much for this site */ - if (len < 5) - return len; /* call too long for patch site */ - - b->opcode = 0xe8; /* call */ - b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != 5); - - return 5; -} - -unsigned paravirt_patch_jmp(void *insnbuf, const void *target, - unsigned long addr, unsigned len) -{ - struct branch *b = insnbuf; - unsigned long delta = (unsigned long)target - (addr+5); - - if (len < 5) - return len; /* call too long for patch site */ - - b->opcode = 0xe9; /* jmp */ - b->delta = delta; - - return 5; -} - -/* Neat trick to map patch type back to the call within the - * corresponding structure. */ -static void *get_call_destination(u8 type) -{ - struct paravirt_patch_template tmpl = { - .pv_init_ops = pv_init_ops, - .pv_time_ops = pv_time_ops, - .pv_cpu_ops = pv_cpu_ops, - .pv_irq_ops = pv_irq_ops, - .pv_apic_ops = pv_apic_ops, - .pv_mmu_ops = pv_mmu_ops, -#ifdef CONFIG_PARAVIRT_SPINLOCKS - .pv_lock_ops = pv_lock_ops, -#endif - }; - return *((void **)&tmpl + type); -} - -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, - unsigned long addr, unsigned len) -{ - void *opfunc = get_call_destination(type); - unsigned ret; - - if (opfunc == NULL) - /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); - else if (opfunc == _paravirt_nop) - /* If the operation is a nop, then nop the callsite */ - ret = paravirt_patch_nop(); - - /* identity functions just return their single argument */ - else if (opfunc == _paravirt_ident_32) - ret = paravirt_patch_ident_32(insnbuf, len); - else if (opfunc == _paravirt_ident_64) - ret = paravirt_patch_ident_64(insnbuf, len); - - else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || - type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || - type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) - /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); - else - /* Otherwise call the function; assume target could - clobber any caller-save reg */ - ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, - addr, clobbers, len); - - return ret; -} - -unsigned paravirt_patch_insns(void *insnbuf, unsigned len, - const char *start, const char *end) -{ - unsigned insn_len = end - start; - - if (insn_len > len || start == NULL) - insn_len = len; - else - memcpy(insnbuf, start, insn_len); - - return insn_len; -} - -static void native_flush_tlb(void) -{ - __native_flush_tlb(); -} - -/* - * Global pages have to be flushed a bit differently. Not a real - * performance problem because this does not happen often. - */ -static void native_flush_tlb_global(void) -{ - __native_flush_tlb_global(); -} - -static void native_flush_tlb_single(unsigned long addr) -{ - __native_flush_tlb_single(addr); -} - -struct static_key paravirt_steal_enabled; -struct static_key paravirt_steal_rq_enabled; - -static u64 native_steal_clock(int cpu) -{ - return 0; -} - -/* These are in entry.S */ -extern void native_iret(void); -extern void native_irq_enable_sysexit(void); -extern void native_usergs_sysret32(void); -extern void native_usergs_sysret64(void); - -static struct resource reserve_ioports = { - .start = 0, - .end = IO_SPACE_LIMIT, - .name = "paravirt-ioport", - .flags = IORESOURCE_IO | IORESOURCE_BUSY, -}; - -/* - * Reserve the whole legacy IO space to prevent any legacy drivers - * from wasting time probing for their hardware. This is a fairly - * brute-force approach to disabling all non-virtual drivers. - * - * Note that this must be called very early to have any effect. - */ -int paravirt_disable_iospace(void) -{ - return request_resource(&ioport_resource, &reserve_ioports); -} - -static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; - -static inline void enter_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - - percpu_write(paravirt_lazy_mode, mode); -} - -static void leave_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(percpu_read(paravirt_lazy_mode) != mode); - - percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); -} - -void paravirt_enter_lazy_mmu(void) -{ - enter_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_leave_lazy_mmu(void) -{ - leave_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_start_context_switch(struct task_struct *prev) -{ - BUG_ON(preemptible()); - - if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); - } - enter_lazy(PARAVIRT_LAZY_CPU); -} - -void paravirt_end_context_switch(struct task_struct *next) -{ - BUG_ON(preemptible()); - - leave_lazy(PARAVIRT_LAZY_CPU); - - if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) - arch_enter_lazy_mmu_mode(); -} - -enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - if (in_interrupt()) - return PARAVIRT_LAZY_NONE; - - return percpu_read(paravirt_lazy_mode); -} - -void arch_flush_lazy_mmu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - -struct pv_info pv_info = { - .name = "bare hardware", - .paravirt_enabled = 0, - .kernel_rpl = 0, - .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ - -#ifdef CONFIG_X86_64 - .extra_user_64bit_cs = __USER_CS, -#endif -}; - -struct pv_init_ops pv_init_ops = { - .patch = native_patch, -}; - -struct pv_time_ops pv_time_ops = { - .sched_clock = native_sched_clock, - .steal_clock = native_steal_clock, -}; - -struct pv_irq_ops pv_irq_ops = { - .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), - .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), - .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), - .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), - .safe_halt = native_safe_halt, - .halt = native_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = paravirt_nop, -#endif -}; - -struct pv_cpu_ops pv_cpu_ops = { - .cpuid = native_cpuid, - .get_debugreg = native_get_debugreg, - .set_debugreg = native_set_debugreg, - .clts = native_clts, - .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, - .read_cr4 = native_read_cr4, - .read_cr4_safe = native_read_cr4_safe, - .write_cr4 = native_write_cr4, -#ifdef CONFIG_X86_64 - .read_cr8 = native_read_cr8, - .write_cr8 = native_write_cr8, -#endif - .wbinvd = native_wbinvd, - .read_msr = native_read_msr_safe, - .rdmsr_regs = native_rdmsr_safe_regs, - .write_msr = native_write_msr_safe, - .wrmsr_regs = native_wrmsr_safe_regs, - .read_tsc = native_read_tsc, - .read_pmc = native_read_pmc, - .read_tscp = native_read_tscp, - .load_tr_desc = native_load_tr_desc, - .set_ldt = native_set_ldt, - .load_gdt = native_load_gdt, - .load_idt = native_load_idt, - .store_gdt = native_store_gdt, - .store_idt = native_store_idt, - .store_tr = native_store_tr, - .load_tls = native_load_tls, -#ifdef CONFIG_X86_64 - .load_gs_index = native_load_gs_index, -#endif - .write_ldt_entry = native_write_ldt_entry, - .write_gdt_entry = native_write_gdt_entry, - .write_idt_entry = native_write_idt_entry, - - .alloc_ldt = paravirt_nop, - .free_ldt = paravirt_nop, - - .load_sp0 = native_load_sp0, - -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) - .irq_enable_sysexit = native_irq_enable_sysexit, -#endif -#ifdef CONFIG_X86_64 -#ifdef CONFIG_IA32_EMULATION - .usergs_sysret32 = native_usergs_sysret32, -#endif - .usergs_sysret64 = native_usergs_sysret64, -#endif - .iret = native_iret, - .swapgs = native_swapgs, - - .set_iopl_mask = native_set_iopl_mask, - .io_delay = native_io_delay, - - .start_context_switch = paravirt_nop, - .end_context_switch = paravirt_nop, -}; - -struct pv_apic_ops pv_apic_ops = { -#ifdef CONFIG_X86_LOCAL_APIC - .startup_ipi_hook = paravirt_nop, -#endif -}; - -#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) -/* 32-bit pagetable entries */ -#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) -#else -/* 64-bit pagetable entries */ -#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) -#endif - -struct pv_mmu_ops pv_mmu_ops = { - - .read_cr2 = native_read_cr2, - .write_cr2 = native_write_cr2, - .read_cr3 = native_read_cr3, - .write_cr3 = native_write_cr3, - - .flush_tlb_user = native_flush_tlb, - .flush_tlb_kernel = native_flush_tlb_global, - .flush_tlb_single = native_flush_tlb_single, - .flush_tlb_others = native_flush_tlb_others, - - .pgd_alloc = __paravirt_pgd_alloc, - .pgd_free = paravirt_nop, - - .alloc_pte = paravirt_nop, - .alloc_pmd = paravirt_nop, - .alloc_pud = paravirt_nop, - .release_pte = paravirt_nop, - .release_pmd = paravirt_nop, - .release_pud = paravirt_nop, - - .set_pte = native_set_pte, - .set_pte_at = native_set_pte_at, - .set_pmd = native_set_pmd, - .set_pmd_at = native_set_pmd_at, - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - .pmd_update = paravirt_nop, - .pmd_update_defer = paravirt_nop, - - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, - -#if PAGETABLE_LEVELS >= 3 -#ifdef CONFIG_X86_PAE - .set_pte_atomic = native_set_pte_atomic, - .pte_clear = native_pte_clear, - .pmd_clear = native_pmd_clear, -#endif - .set_pud = native_set_pud, - - .pmd_val = PTE_IDENT, - .make_pmd = PTE_IDENT, - -#if PAGETABLE_LEVELS == 4 - .pud_val = PTE_IDENT, - .make_pud = PTE_IDENT, - - .set_pgd = native_set_pgd, -#endif -#endif /* PAGETABLE_LEVELS >= 3 */ - - .pte_val = PTE_IDENT, - .pgd_val = PTE_IDENT, - - .make_pte = PTE_IDENT, - .make_pgd = PTE_IDENT, - - .dup_mmap = paravirt_nop, - .exit_mmap = paravirt_nop, - .activate_mm = paravirt_nop, - - .lazy_mode = { - .enter = paravirt_nop, - .leave = paravirt_nop, - }, - - .set_fixmap = native_set_fixmap, -}; - -EXPORT_SYMBOL_GPL(pv_time_ops); -EXPORT_SYMBOL (pv_cpu_ops); -EXPORT_SYMBOL (pv_mmu_ops); -EXPORT_SYMBOL_GPL(pv_apic_ops); -EXPORT_SYMBOL_GPL(pv_info); -EXPORT_SYMBOL (pv_irq_ops); diff --git a/ANDROID_3.4.5/arch/x86/kernel/paravirt_patch_32.c b/ANDROID_3.4.5/arch/x86/kernel/paravirt_patch_32.c deleted file mode 100644 index d9f32e6d..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/paravirt_patch_32.c +++ /dev/null @@ -1,61 +0,0 @@ -#include <asm/paravirt.h> - -DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); -DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); -DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); -DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); -DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); -DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); -DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); -DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); -DEF_NATIVE(pv_cpu_ops, clts, "clts"); -DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); - -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) -{ - /* arg in %eax, return in %eax */ - return 0; -} - -unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) -{ - /* arg in %edx:%eax, return in %edx:%eax */ - return 0; -} - -unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len) -{ - const unsigned char *start, *end; - unsigned ret; - -#define PATCH_SITE(ops, x) \ - case PARAVIRT_PATCH(ops.x): \ - start = start_##ops##_##x; \ - end = end_##ops##_##x; \ - goto patch_site - switch (type) { - PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_irq_ops, irq_enable); - PATCH_SITE(pv_irq_ops, restore_fl); - PATCH_SITE(pv_irq_ops, save_fl); - PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); - PATCH_SITE(pv_mmu_ops, read_cr2); - PATCH_SITE(pv_mmu_ops, read_cr3); - PATCH_SITE(pv_mmu_ops, write_cr3); - PATCH_SITE(pv_cpu_ops, clts); - PATCH_SITE(pv_cpu_ops, read_tsc); - - patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); - break; - - default: - ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); - break; - } -#undef PATCH_SITE - return ret; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/paravirt_patch_64.c b/ANDROID_3.4.5/arch/x86/kernel/paravirt_patch_64.c deleted file mode 100644 index 3f08f34f..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/paravirt_patch_64.c +++ /dev/null @@ -1,75 +0,0 @@ -#include <asm/paravirt.h> -#include <asm/asm-offsets.h> -#include <linux/stringify.h> - -DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); -DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); -DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); -DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(pv_cpu_ops, iret, "iretq"); -DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); -DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); -DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); -DEF_NATIVE(pv_cpu_ops, clts, "clts"); -DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); - -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit"); -DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); -DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); -DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); - -DEF_NATIVE(, mov32, "mov %edi, %eax"); -DEF_NATIVE(, mov64, "mov %rdi, %rax"); - -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) -{ - return paravirt_patch_insns(insnbuf, len, - start__mov32, end__mov32); -} - -unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) -{ - return paravirt_patch_insns(insnbuf, len, - start__mov64, end__mov64); -} - -unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len) -{ - const unsigned char *start, *end; - unsigned ret; - -#define PATCH_SITE(ops, x) \ - case PARAVIRT_PATCH(ops.x): \ - start = start_##ops##_##x; \ - end = end_##ops##_##x; \ - goto patch_site - switch(type) { - PATCH_SITE(pv_irq_ops, restore_fl); - PATCH_SITE(pv_irq_ops, save_fl); - PATCH_SITE(pv_irq_ops, irq_enable); - PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); - PATCH_SITE(pv_cpu_ops, usergs_sysret32); - PATCH_SITE(pv_cpu_ops, usergs_sysret64); - PATCH_SITE(pv_cpu_ops, swapgs); - PATCH_SITE(pv_mmu_ops, read_cr2); - PATCH_SITE(pv_mmu_ops, read_cr3); - PATCH_SITE(pv_mmu_ops, write_cr3); - PATCH_SITE(pv_cpu_ops, clts); - PATCH_SITE(pv_mmu_ops, flush_tlb_single); - PATCH_SITE(pv_cpu_ops, wbinvd); - - patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); - break; - - default: - ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); - break; - } -#undef PATCH_SITE - return ret; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/pci-calgary_64.c b/ANDROID_3.4.5/arch/x86/kernel/pci-calgary_64.c deleted file mode 100644 index d0b2fb9c..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/pci-calgary_64.c +++ /dev/null @@ -1,1600 +0,0 @@ -/* - * Derived from arch/powerpc/kernel/iommu.c - * - * Copyright IBM Corporation, 2006-2007 - * Copyright (C) 2006 Jon Mason <jdmason@kudzu.us> - * - * Author: Jon Mason <jdmason@kudzu.us> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/crash_dump.h> -#include <linux/dma-mapping.h> -#include <linux/bitmap.h> -#include <linux/pci_ids.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/scatterlist.h> -#include <linux/iommu-helper.h> - -#include <asm/iommu.h> -#include <asm/calgary.h> -#include <asm/tce.h> -#include <asm/pci-direct.h> -#include <asm/dma.h> -#include <asm/rio.h> -#include <asm/bios_ebda.h> -#include <asm/x86_init.h> -#include <asm/iommu_table.h> - -#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT -int use_calgary __read_mostly = 1; -#else -int use_calgary __read_mostly = 0; -#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ - -#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 -#define PCI_DEVICE_ID_IBM_CALIOC2 0x0308 - -/* register offsets inside the host bridge space */ -#define CALGARY_CONFIG_REG 0x0108 -#define PHB_CSR_OFFSET 0x0110 /* Channel Status */ -#define PHB_PLSSR_OFFSET 0x0120 -#define PHB_CONFIG_RW_OFFSET 0x0160 -#define PHB_IOBASE_BAR_LOW 0x0170 -#define PHB_IOBASE_BAR_HIGH 0x0180 -#define PHB_MEM_1_LOW 0x0190 -#define PHB_MEM_1_HIGH 0x01A0 -#define PHB_IO_ADDR_SIZE 0x01B0 -#define PHB_MEM_1_SIZE 0x01C0 -#define PHB_MEM_ST_OFFSET 0x01D0 -#define PHB_AER_OFFSET 0x0200 -#define PHB_CONFIG_0_HIGH 0x0220 -#define PHB_CONFIG_0_LOW 0x0230 -#define PHB_CONFIG_0_END 0x0240 -#define PHB_MEM_2_LOW 0x02B0 -#define PHB_MEM_2_HIGH 0x02C0 -#define PHB_MEM_2_SIZE_HIGH 0x02D0 -#define PHB_MEM_2_SIZE_LOW 0x02E0 -#define PHB_DOSHOLE_OFFSET 0x08E0 - -/* CalIOC2 specific */ -#define PHB_SAVIOR_L2 0x0DB0 -#define PHB_PAGE_MIG_CTRL 0x0DA8 -#define PHB_PAGE_MIG_DEBUG 0x0DA0 -#define PHB_ROOT_COMPLEX_STATUS 0x0CB0 - -/* PHB_CONFIG_RW */ -#define PHB_TCE_ENABLE 0x20000000 -#define PHB_SLOT_DISABLE 0x1C000000 -#define PHB_DAC_DISABLE 0x01000000 -#define PHB_MEM2_ENABLE 0x00400000 -#define PHB_MCSR_ENABLE 0x00100000 -/* TAR (Table Address Register) */ -#define TAR_SW_BITS 0x0000ffffffff800fUL -#define TAR_VALID 0x0000000000000008UL -/* CSR (Channel/DMA Status Register) */ -#define CSR_AGENT_MASK 0xffe0ffff -/* CCR (Calgary Configuration Register) */ -#define CCR_2SEC_TIMEOUT 0x000000000000000EUL -/* PMCR/PMDR (Page Migration Control/Debug Registers */ -#define PMR_SOFTSTOP 0x80000000 -#define PMR_SOFTSTOPFAULT 0x40000000 -#define PMR_HARDSTOP 0x20000000 - -/* - * The maximum PHB bus number. - * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384 - * x3950M2: 4 chassis, 48 PHBs per chassis = 192 - * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256 - * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128 - */ -#define MAX_PHB_BUS_NUM 256 - -#define PHBS_PER_CALGARY 4 - -/* register offsets in Calgary's internal register space */ -static const unsigned long tar_offsets[] = { - 0x0580 /* TAR0 */, - 0x0588 /* TAR1 */, - 0x0590 /* TAR2 */, - 0x0598 /* TAR3 */ -}; - -static const unsigned long split_queue_offsets[] = { - 0x4870 /* SPLIT QUEUE 0 */, - 0x5870 /* SPLIT QUEUE 1 */, - 0x6870 /* SPLIT QUEUE 2 */, - 0x7870 /* SPLIT QUEUE 3 */ -}; - -static const unsigned long phb_offsets[] = { - 0x8000 /* PHB0 */, - 0x9000 /* PHB1 */, - 0xA000 /* PHB2 */, - 0xB000 /* PHB3 */ -}; - -/* PHB debug registers */ - -static const unsigned long phb_debug_offsets[] = { - 0x4000 /* PHB 0 DEBUG */, - 0x5000 /* PHB 1 DEBUG */, - 0x6000 /* PHB 2 DEBUG */, - 0x7000 /* PHB 3 DEBUG */ -}; - -/* - * STUFF register for each debug PHB, - * byte 1 = start bus number, byte 2 = end bus number - */ - -#define PHB_DEBUG_STUFF_OFFSET 0x0020 - -#define EMERGENCY_PAGES 32 /* = 128KB */ - -unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; -static int translate_empty_slots __read_mostly = 0; -static int calgary_detected __read_mostly = 0; - -static struct rio_table_hdr *rio_table_hdr __initdata; -static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata; - -struct calgary_bus_info { - void *tce_space; - unsigned char translation_disabled; - signed char phbid; - void __iomem *bbar; -}; - -static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); -static void calgary_tce_cache_blast(struct iommu_table *tbl); -static void calgary_dump_error_regs(struct iommu_table *tbl); -static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); -static void calioc2_tce_cache_blast(struct iommu_table *tbl); -static void calioc2_dump_error_regs(struct iommu_table *tbl); -static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); -static void get_tce_space_from_tar(void); - -static struct cal_chipset_ops calgary_chip_ops = { - .handle_quirks = calgary_handle_quirks, - .tce_cache_blast = calgary_tce_cache_blast, - .dump_error_regs = calgary_dump_error_regs -}; - -static struct cal_chipset_ops calioc2_chip_ops = { - .handle_quirks = calioc2_handle_quirks, - .tce_cache_blast = calioc2_tce_cache_blast, - .dump_error_regs = calioc2_dump_error_regs -}; - -static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; - -static inline int translation_enabled(struct iommu_table *tbl) -{ - /* only PHBs with translation enabled have an IOMMU table */ - return (tbl != NULL); -} - -static void iommu_range_reserve(struct iommu_table *tbl, - unsigned long start_addr, unsigned int npages) -{ - unsigned long index; - unsigned long end; - unsigned long flags; - - index = start_addr >> PAGE_SHIFT; - - /* bail out if we're asked to reserve a region we don't cover */ - if (index >= tbl->it_size) - return; - - end = index + npages; - if (end > tbl->it_size) /* don't go off the table */ - end = tbl->it_size; - - spin_lock_irqsave(&tbl->it_lock, flags); - - bitmap_set(tbl->it_map, index, npages); - - spin_unlock_irqrestore(&tbl->it_lock, flags); -} - -static unsigned long iommu_range_alloc(struct device *dev, - struct iommu_table *tbl, - unsigned int npages) -{ - unsigned long flags; - unsigned long offset; - unsigned long boundary_size; - - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; - - BUG_ON(npages == 0); - - spin_lock_irqsave(&tbl->it_lock, flags); - - offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint, - npages, 0, boundary_size, 0); - if (offset == ~0UL) { - tbl->chip_ops->tce_cache_blast(tbl); - - offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, - npages, 0, boundary_size, 0); - if (offset == ~0UL) { - printk(KERN_WARNING "Calgary: IOMMU full.\n"); - spin_unlock_irqrestore(&tbl->it_lock, flags); - if (panic_on_overflow) - panic("Calgary: fix the allocator.\n"); - else - return DMA_ERROR_CODE; - } - } - - tbl->it_hint = offset + npages; - BUG_ON(tbl->it_hint > tbl->it_size); - - spin_unlock_irqrestore(&tbl->it_lock, flags); - - return offset; -} - -static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, - void *vaddr, unsigned int npages, int direction) -{ - unsigned long entry; - dma_addr_t ret; - - entry = iommu_range_alloc(dev, tbl, npages); - - if (unlikely(entry == DMA_ERROR_CODE)) { - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); - return DMA_ERROR_CODE; - } - - /* set the return dma address */ - ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); - - /* put the TCEs in the HW table */ - tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, - direction); - return ret; -} - -static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, - unsigned int npages) -{ - unsigned long entry; - unsigned long badend; - unsigned long flags; - - /* were we called with bad_dma_address? */ - badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { - WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " - "address 0x%Lx\n", dma_addr); - return; - } - - entry = dma_addr >> PAGE_SHIFT; - - BUG_ON(entry + npages > tbl->it_size); - - tce_free(tbl, entry, npages); - - spin_lock_irqsave(&tbl->it_lock, flags); - - bitmap_clear(tbl->it_map, entry, npages); - - spin_unlock_irqrestore(&tbl->it_lock, flags); -} - -static inline struct iommu_table *find_iommu_table(struct device *dev) -{ - struct pci_dev *pdev; - struct pci_bus *pbus; - struct iommu_table *tbl; - - pdev = to_pci_dev(dev); - - /* search up the device tree for an iommu */ - pbus = pdev->bus; - do { - tbl = pci_iommu(pbus); - if (tbl && tbl->it_busno == pbus->number) - break; - tbl = NULL; - pbus = pbus->parent; - } while (pbus); - - BUG_ON(tbl && (tbl->it_busno != pbus->number)); - - return tbl; -} - -static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems,enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct iommu_table *tbl = find_iommu_table(dev); - struct scatterlist *s; - int i; - - if (!translation_enabled(tbl)) - return; - - for_each_sg(sglist, s, nelems, i) { - unsigned int npages; - dma_addr_t dma = s->dma_address; - unsigned int dmalen = s->dma_length; - - if (dmalen == 0) - break; - - npages = iommu_num_pages(dma, dmalen, PAGE_SIZE); - iommu_free(tbl, dma, npages); - } -} - -static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct iommu_table *tbl = find_iommu_table(dev); - struct scatterlist *s; - unsigned long vaddr; - unsigned int npages; - unsigned long entry; - int i; - - for_each_sg(sg, s, nelems, i) { - BUG_ON(!sg_page(s)); - - vaddr = (unsigned long) sg_virt(s); - npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); - - entry = iommu_range_alloc(dev, tbl, npages); - if (entry == DMA_ERROR_CODE) { - /* makes sure unmap knows to stop */ - s->dma_length = 0; - goto error; - } - - s->dma_address = (entry << PAGE_SHIFT) | s->offset; - - /* insert into HW table */ - tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir); - - s->dma_length = s->length; - } - - return nelems; -error: - calgary_unmap_sg(dev, sg, nelems, dir, NULL); - for_each_sg(sg, s, nelems, i) { - sg->dma_address = DMA_ERROR_CODE; - sg->dma_length = 0; - } - return 0; -} - -static dma_addr_t calgary_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - void *vaddr = page_address(page) + offset; - unsigned long uaddr; - unsigned int npages; - struct iommu_table *tbl = find_iommu_table(dev); - - uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, PAGE_SIZE); - - return iommu_alloc(dev, tbl, vaddr, npages, dir); -} - -static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct iommu_table *tbl = find_iommu_table(dev); - unsigned int npages; - - npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - iommu_free(tbl, dma_addr, npages); -} - -static void* calgary_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) -{ - void *ret = NULL; - dma_addr_t mapping; - unsigned int npages, order; - struct iommu_table *tbl = find_iommu_table(dev); - - size = PAGE_ALIGN(size); /* size rounded up to full pages */ - npages = size >> PAGE_SHIFT; - order = get_order(size); - - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - - /* alloc enough pages (and possibly more) */ - ret = (void *)__get_free_pages(flag, order); - if (!ret) - goto error; - memset(ret, 0, size); - - /* set up tces to cover the allocated range */ - mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == DMA_ERROR_CODE) - goto free; - *dma_handle = mapping; - return ret; -free: - free_pages((unsigned long)ret, get_order(size)); - ret = NULL; -error: - return ret; -} - -static void calgary_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) -{ - unsigned int npages; - struct iommu_table *tbl = find_iommu_table(dev); - - size = PAGE_ALIGN(size); - npages = size >> PAGE_SHIFT; - - iommu_free(tbl, dma_handle, npages); - free_pages((unsigned long)vaddr, get_order(size)); -} - -static struct dma_map_ops calgary_dma_ops = { - .alloc = calgary_alloc_coherent, - .free = calgary_free_coherent, - .map_sg = calgary_map_sg, - .unmap_sg = calgary_unmap_sg, - .map_page = calgary_map_page, - .unmap_page = calgary_unmap_page, -}; - -static inline void __iomem * busno_to_bbar(unsigned char num) -{ - return bus_info[num].bbar; -} - -static inline int busno_to_phbid(unsigned char num) -{ - return bus_info[num].phbid; -} - -static inline unsigned long split_queue_offset(unsigned char num) -{ - size_t idx = busno_to_phbid(num); - - return split_queue_offsets[idx]; -} - -static inline unsigned long tar_offset(unsigned char num) -{ - size_t idx = busno_to_phbid(num); - - return tar_offsets[idx]; -} - -static inline unsigned long phb_offset(unsigned char num) -{ - size_t idx = busno_to_phbid(num); - - return phb_offsets[idx]; -} - -static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset) -{ - unsigned long target = ((unsigned long)bar) | offset; - return (void __iomem*)target; -} - -static inline int is_calioc2(unsigned short device) -{ - return (device == PCI_DEVICE_ID_IBM_CALIOC2); -} - -static inline int is_calgary(unsigned short device) -{ - return (device == PCI_DEVICE_ID_IBM_CALGARY); -} - -static inline int is_cal_pci_dev(unsigned short device) -{ - return (is_calgary(device) || is_calioc2(device)); -} - -static void calgary_tce_cache_blast(struct iommu_table *tbl) -{ - u64 val; - u32 aer; - int i = 0; - void __iomem *bbar = tbl->bbar; - void __iomem *target; - - /* disable arbitration on the bus */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET); - aer = readl(target); - writel(0, target); - - /* read plssr to ensure it got there */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET); - val = readl(target); - - /* poll split queues until all DMA activity is done */ - target = calgary_reg(bbar, split_queue_offset(tbl->it_busno)); - do { - val = readq(target); - i++; - } while ((val & 0xff) != 0xff && i < 100); - if (i == 100) - printk(KERN_WARNING "Calgary: PCI bus not quiesced, " - "continuing anyway\n"); - - /* invalidate TCE cache */ - target = calgary_reg(bbar, tar_offset(tbl->it_busno)); - writeq(tbl->tar_val, target); - - /* enable arbitration */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET); - writel(aer, target); - (void)readl(target); /* flush */ -} - -static void calioc2_tce_cache_blast(struct iommu_table *tbl) -{ - void __iomem *bbar = tbl->bbar; - void __iomem *target; - u64 val64; - u32 val; - int i = 0; - int count = 1; - unsigned char bus = tbl->it_busno; - -begin: - printk(KERN_DEBUG "Calgary: CalIOC2 bus 0x%x entering tce cache blast " - "sequence - count %d\n", bus, count); - - /* 1. using the Page Migration Control reg set SoftStop */ - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "1a. read 0x%x [LE] from %p\n", val, target); - val |= PMR_SOFTSTOP; - printk(KERN_DEBUG "1b. writing 0x%x [LE] to %p\n", val, target); - writel(cpu_to_be32(val), target); - - /* 2. poll split queues until all DMA activity is done */ - printk(KERN_DEBUG "2a. starting to poll split queues\n"); - target = calgary_reg(bbar, split_queue_offset(bus)); - do { - val64 = readq(target); - i++; - } while ((val64 & 0xff) != 0xff && i < 100); - if (i == 100) - printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " - "continuing anyway\n"); - - /* 3. poll Page Migration DEBUG for SoftStopFault */ - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "3. read 0x%x [LE] from %p\n", val, target); - - /* 4. if SoftStopFault - goto (1) */ - if (val & PMR_SOFTSTOPFAULT) { - if (++count < 100) - goto begin; - else { - printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " - "aborting TCE cache flush sequence!\n"); - return; /* pray for the best */ - } - } - - /* 5. Slam into HardStop by reading PHB_PAGE_MIG_CTRL */ - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - printk(KERN_DEBUG "5a. slamming into HardStop by reading %p\n", target); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "5b. read 0x%x [LE] from %p\n", val, target); - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "5c. read 0x%x [LE] from %p (debug)\n", val, target); - - /* 6. invalidate TCE cache */ - printk(KERN_DEBUG "6. invalidating TCE cache\n"); - target = calgary_reg(bbar, tar_offset(bus)); - writeq(tbl->tar_val, target); - - /* 7. Re-read PMCR */ - printk(KERN_DEBUG "7a. Re-reading PMCR\n"); - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "7b. read 0x%x [LE] from %p\n", val, target); - - /* 8. Remove HardStop */ - printk(KERN_DEBUG "8a. removing HardStop from PMCR\n"); - target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL); - val = 0; - printk(KERN_DEBUG "8b. writing 0x%x [LE] to %p\n", val, target); - writel(cpu_to_be32(val), target); - val = be32_to_cpu(readl(target)); - printk(KERN_DEBUG "8c. read 0x%x [LE] from %p\n", val, target); -} - -static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start, - u64 limit) -{ - unsigned int numpages; - - limit = limit | 0xfffff; - limit++; - - numpages = ((limit - start) >> PAGE_SHIFT); - iommu_range_reserve(pci_iommu(dev->bus), start, numpages); -} - -static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev) -{ - void __iomem *target; - u64 low, high, sizelow; - u64 start, limit; - struct iommu_table *tbl = pci_iommu(dev->bus); - unsigned char busnum = dev->bus->number; - void __iomem *bbar = tbl->bbar; - - /* peripheral MEM_1 region */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW); - low = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH); - high = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE); - sizelow = be32_to_cpu(readl(target)); - - start = (high << 32) | low; - limit = sizelow; - - calgary_reserve_mem_region(dev, start, limit); -} - -static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev) -{ - void __iomem *target; - u32 val32; - u64 low, high, sizelow, sizehigh; - u64 start, limit; - struct iommu_table *tbl = pci_iommu(dev->bus); - unsigned char busnum = dev->bus->number; - void __iomem *bbar = tbl->bbar; - - /* is it enabled? */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - if (!(val32 & PHB_MEM2_ENABLE)) - return; - - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW); - low = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH); - high = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW); - sizelow = be32_to_cpu(readl(target)); - target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH); - sizehigh = be32_to_cpu(readl(target)); - - start = (high << 32) | low; - limit = (sizehigh << 32) | sizelow; - - calgary_reserve_mem_region(dev, start, limit); -} - -/* - * some regions of the IO address space do not get translated, so we - * must not give devices IO addresses in those regions. The regions - * are the 640KB-1MB region and the two PCI peripheral memory holes. - * Reserve all of them in the IOMMU bitmap to avoid giving them out - * later. - */ -static void __init calgary_reserve_regions(struct pci_dev *dev) -{ - unsigned int npages; - u64 start; - struct iommu_table *tbl = pci_iommu(dev->bus); - - /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); - - /* avoid the BIOS/VGA first 640KB-1MB region */ - /* for CalIOC2 - avoid the entire first MB */ - if (is_calgary(dev->device)) { - start = (640 * 1024); - npages = ((1024 - 640) * 1024) >> PAGE_SHIFT; - } else { /* calioc2 */ - start = 0; - npages = (1 * 1024 * 1024) >> PAGE_SHIFT; - } - iommu_range_reserve(tbl, start, npages); - - /* reserve the two PCI peripheral memory regions in IO space */ - calgary_reserve_peripheral_mem_1(dev); - calgary_reserve_peripheral_mem_2(dev); -} - -static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) -{ - u64 val64; - u64 table_phys; - void __iomem *target; - int ret; - struct iommu_table *tbl; - - /* build TCE tables for each PHB */ - ret = build_tce_table(dev, bbar); - if (ret) - return ret; - - tbl = pci_iommu(dev->bus); - tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; - - if (is_kdump_kernel()) - calgary_init_bitmap_from_tce_table(tbl); - else - tce_free(tbl, 0, tbl->it_size); - - if (is_calgary(dev->device)) - tbl->chip_ops = &calgary_chip_ops; - else if (is_calioc2(dev->device)) - tbl->chip_ops = &calioc2_chip_ops; - else - BUG(); - - calgary_reserve_regions(dev); - - /* set TARs for each PHB */ - target = calgary_reg(bbar, tar_offset(dev->bus->number)); - val64 = be64_to_cpu(readq(target)); - - /* zero out all TAR bits under sw control */ - val64 &= ~TAR_SW_BITS; - table_phys = (u64)__pa(tbl->it_base); - - val64 |= table_phys; - - BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M); - val64 |= (u64) specified_table_size; - - tbl->tar_val = cpu_to_be64(val64); - - writeq(tbl->tar_val, target); - readq(target); /* flush */ - - return 0; -} - -static void __init calgary_free_bus(struct pci_dev *dev) -{ - u64 val64; - struct iommu_table *tbl = pci_iommu(dev->bus); - void __iomem *target; - unsigned int bitmapsz; - - target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number)); - val64 = be64_to_cpu(readq(target)); - val64 &= ~TAR_SW_BITS; - writeq(cpu_to_be64(val64), target); - readq(target); /* flush */ - - bitmapsz = tbl->it_size / BITS_PER_BYTE; - free_pages((unsigned long)tbl->it_map, get_order(bitmapsz)); - tbl->it_map = NULL; - - kfree(tbl); - - set_pci_iommu(dev->bus, NULL); - - /* Can't free bootmem allocated memory after system is up :-( */ - bus_info[dev->bus->number].tce_space = NULL; -} - -static void calgary_dump_error_regs(struct iommu_table *tbl) -{ - void __iomem *bbar = tbl->bbar; - void __iomem *target; - u32 csr, plssr; - - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET); - csr = be32_to_cpu(readl(target)); - - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET); - plssr = be32_to_cpu(readl(target)); - - /* If no error, the agent ID in the CSR is not valid */ - printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " - "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); -} - -static void calioc2_dump_error_regs(struct iommu_table *tbl) -{ - void __iomem *bbar = tbl->bbar; - u32 csr, csmr, plssr, mck, rcstat; - void __iomem *target; - unsigned long phboff = phb_offset(tbl->it_busno); - unsigned long erroff; - u32 errregs[7]; - int i; - - /* dump CSR */ - target = calgary_reg(bbar, phboff | PHB_CSR_OFFSET); - csr = be32_to_cpu(readl(target)); - /* dump PLSSR */ - target = calgary_reg(bbar, phboff | PHB_PLSSR_OFFSET); - plssr = be32_to_cpu(readl(target)); - /* dump CSMR */ - target = calgary_reg(bbar, phboff | 0x290); - csmr = be32_to_cpu(readl(target)); - /* dump mck */ - target = calgary_reg(bbar, phboff | 0x800); - mck = be32_to_cpu(readl(target)); - - printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", - tbl->it_busno); - - printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", - csr, plssr, csmr, mck); - - /* dump rest of error regs */ - printk(KERN_EMERG "Calgary: "); - for (i = 0; i < ARRAY_SIZE(errregs); i++) { - /* err regs are at 0x810 - 0x870 */ - erroff = (0x810 + (i * 0x10)); - target = calgary_reg(bbar, phboff | erroff); - errregs[i] = be32_to_cpu(readl(target)); - printk("0x%08x@0x%lx ", errregs[i], erroff); - } - printk("\n"); - - /* root complex status */ - target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); - rcstat = be32_to_cpu(readl(target)); - printk(KERN_EMERG "Calgary: 0x%08x@0x%x\n", rcstat, - PHB_ROOT_COMPLEX_STATUS); -} - -static void calgary_watchdog(unsigned long data) -{ - struct pci_dev *dev = (struct pci_dev *)data; - struct iommu_table *tbl = pci_iommu(dev->bus); - void __iomem *bbar = tbl->bbar; - u32 val32; - void __iomem *target; - - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET); - val32 = be32_to_cpu(readl(target)); - - /* If no error, the agent ID in the CSR is not valid */ - if (val32 & CSR_AGENT_MASK) { - tbl->chip_ops->dump_error_regs(tbl); - - /* reset error */ - writel(0, target); - - /* Disable bus that caused the error */ - target = calgary_reg(bbar, phb_offset(tbl->it_busno) | - PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - val32 |= PHB_SLOT_DISABLE; - writel(cpu_to_be32(val32), target); - readl(target); /* flush */ - } else { - /* Reset the timer */ - mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ); - } -} - -static void __init calgary_set_split_completion_timeout(void __iomem *bbar, - unsigned char busnum, unsigned long timeout) -{ - u64 val64; - void __iomem *target; - unsigned int phb_shift = ~0; /* silence gcc */ - u64 mask; - - switch (busno_to_phbid(busnum)) { - case 0: phb_shift = (63 - 19); - break; - case 1: phb_shift = (63 - 23); - break; - case 2: phb_shift = (63 - 27); - break; - case 3: phb_shift = (63 - 35); - break; - default: - BUG_ON(busno_to_phbid(busnum)); - } - - target = calgary_reg(bbar, CALGARY_CONFIG_REG); - val64 = be64_to_cpu(readq(target)); - - /* zero out this PHB's timer bits */ - mask = ~(0xFUL << phb_shift); - val64 &= mask; - val64 |= (timeout << phb_shift); - writeq(cpu_to_be64(val64), target); - readq(target); /* flush */ -} - -static void __init calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) -{ - unsigned char busnum = dev->bus->number; - void __iomem *bbar = tbl->bbar; - void __iomem *target; - u32 val; - - /* - * CalIOC2 designers recommend setting bit 8 in 0xnDB0 to 1 - */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_SAVIOR_L2); - val = cpu_to_be32(readl(target)); - val |= 0x00800000; - writel(cpu_to_be32(val), target); -} - -static void __init calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) -{ - unsigned char busnum = dev->bus->number; - - /* - * Give split completion a longer timeout on bus 1 for aic94xx - * http://bugzilla.kernel.org/show_bug.cgi?id=7180 - */ - if (is_calgary(dev->device) && (busnum == 1)) - calgary_set_split_completion_timeout(tbl->bbar, busnum, - CCR_2SEC_TIMEOUT); -} - -static void __init calgary_enable_translation(struct pci_dev *dev) -{ - u32 val32; - unsigned char busnum; - void __iomem *target; - void __iomem *bbar; - struct iommu_table *tbl; - - busnum = dev->bus->number; - tbl = pci_iommu(dev->bus); - bbar = tbl->bbar; - - /* enable TCE in PHB Config Register */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE; - - printk(KERN_INFO "Calgary: enabling translation on %s PHB %#x\n", - (dev->device == PCI_DEVICE_ID_IBM_CALGARY) ? - "Calgary" : "CalIOC2", busnum); - printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this " - "bus.\n"); - - writel(cpu_to_be32(val32), target); - readl(target); /* flush */ - - init_timer(&tbl->watchdog_timer); - tbl->watchdog_timer.function = &calgary_watchdog; - tbl->watchdog_timer.data = (unsigned long)dev; - mod_timer(&tbl->watchdog_timer, jiffies); -} - -static void __init calgary_disable_translation(struct pci_dev *dev) -{ - u32 val32; - unsigned char busnum; - void __iomem *target; - void __iomem *bbar; - struct iommu_table *tbl; - - busnum = dev->bus->number; - tbl = pci_iommu(dev->bus); - bbar = tbl->bbar; - - /* disable TCE in PHB Config Register */ - target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); - val32 = be32_to_cpu(readl(target)); - val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE); - - printk(KERN_INFO "Calgary: disabling translation on PHB %#x!\n", busnum); - writel(cpu_to_be32(val32), target); - readl(target); /* flush */ - - del_timer_sync(&tbl->watchdog_timer); -} - -static void __init calgary_init_one_nontraslated(struct pci_dev *dev) -{ - pci_dev_get(dev); - set_pci_iommu(dev->bus, NULL); - - /* is the device behind a bridge? */ - if (dev->bus->parent) - dev->bus->parent->self = dev; - else - dev->bus->self = dev; -} - -static int __init calgary_init_one(struct pci_dev *dev) -{ - void __iomem *bbar; - struct iommu_table *tbl; - int ret; - - bbar = busno_to_bbar(dev->bus->number); - ret = calgary_setup_tar(dev, bbar); - if (ret) - goto done; - - pci_dev_get(dev); - - if (dev->bus->parent) { - if (dev->bus->parent->self) - printk(KERN_WARNING "Calgary: IEEEE, dev %p has " - "bus->parent->self!\n", dev); - dev->bus->parent->self = dev; - } else - dev->bus->self = dev; - - tbl = pci_iommu(dev->bus); - tbl->chip_ops->handle_quirks(tbl, dev); - - calgary_enable_translation(dev); - - return 0; - -done: - return ret; -} - -static int __init calgary_locate_bbars(void) -{ - int ret; - int rioidx, phb, bus; - void __iomem *bbar; - void __iomem *target; - unsigned long offset; - u8 start_bus, end_bus; - u32 val; - - ret = -ENODATA; - for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) { - struct rio_detail *rio = rio_devs[rioidx]; - - if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY)) - continue; - - /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(rio->BBAR, 1024 * 1024); - if (!bbar) - goto error; - - for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { - offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET; - target = calgary_reg(bbar, offset); - - val = be32_to_cpu(readl(target)); - - start_bus = (u8)((val & 0x00FF0000) >> 16); - end_bus = (u8)((val & 0x0000FF00) >> 8); - - if (end_bus) { - for (bus = start_bus; bus <= end_bus; bus++) { - bus_info[bus].bbar = bbar; - bus_info[bus].phbid = phb; - } - } else { - bus_info[start_bus].bbar = bbar; - bus_info[start_bus].phbid = phb; - } - } - } - - return 0; - -error: - /* scan bus_info and iounmap any bbars we previously ioremap'd */ - for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++) - if (bus_info[bus].bbar) - iounmap(bus_info[bus].bbar); - - return ret; -} - -static int __init calgary_init(void) -{ - int ret; - struct pci_dev *dev = NULL; - struct calgary_bus_info *info; - - ret = calgary_locate_bbars(); - if (ret) - return ret; - - /* Purely for kdump kernel case */ - if (is_kdump_kernel()) - get_tce_space_from_tar(); - - do { - dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); - if (!dev) - break; - if (!is_cal_pci_dev(dev->device)) - continue; - - info = &bus_info[dev->bus->number]; - if (info->translation_disabled) { - calgary_init_one_nontraslated(dev); - continue; - } - - if (!info->tce_space && !translate_empty_slots) - continue; - - ret = calgary_init_one(dev); - if (ret) - goto error; - } while (1); - - dev = NULL; - for_each_pci_dev(dev) { - struct iommu_table *tbl; - - tbl = find_iommu_table(&dev->dev); - - if (translation_enabled(tbl)) - dev->dev.archdata.dma_ops = &calgary_dma_ops; - } - - return ret; - -error: - do { - dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); - if (!dev) - break; - if (!is_cal_pci_dev(dev->device)) - continue; - - info = &bus_info[dev->bus->number]; - if (info->translation_disabled) { - pci_dev_put(dev); - continue; - } - if (!info->tce_space && !translate_empty_slots) - continue; - - calgary_disable_translation(dev); - calgary_free_bus(dev); - pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ - dev->dev.archdata.dma_ops = NULL; - } while (1); - - return ret; -} - -static inline int __init determine_tce_table_size(u64 ram) -{ - int ret; - - if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED) - return specified_table_size; - - /* - * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to - * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each - * larger table size has twice as many entries, so shift the - * max ram address by 13 to divide by 8K and then look at the - * order of the result to choose between 0-7. - */ - ret = get_order(ram >> 13); - if (ret > TCE_TABLE_SIZE_8M) - ret = TCE_TABLE_SIZE_8M; - - return ret; -} - -static int __init build_detail_arrays(void) -{ - unsigned long ptr; - unsigned numnodes, i; - int scal_detail_size, rio_detail_size; - - numnodes = rio_table_hdr->num_scal_dev; - if (numnodes > MAX_NUMNODES){ - printk(KERN_WARNING - "Calgary: MAX_NUMNODES too low! Defined as %d, " - "but system has %d nodes.\n", - MAX_NUMNODES, numnodes); - return -ENODEV; - } - - switch (rio_table_hdr->version){ - case 2: - scal_detail_size = 11; - rio_detail_size = 13; - break; - case 3: - scal_detail_size = 12; - rio_detail_size = 15; - break; - default: - printk(KERN_WARNING - "Calgary: Invalid Rio Grande Table Version: %d\n", - rio_table_hdr->version); - return -EPROTO; - } - - ptr = ((unsigned long)rio_table_hdr) + 3; - for (i = 0; i < numnodes; i++, ptr += scal_detail_size) - scal_devs[i] = (struct scal_detail *)ptr; - - for (i = 0; i < rio_table_hdr->num_rio_dev; - i++, ptr += rio_detail_size) - rio_devs[i] = (struct rio_detail *)ptr; - - return 0; -} - -static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) -{ - int dev; - u32 val; - - if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) { - /* - * FIXME: properly scan for devices across the - * PCI-to-PCI bridge on every CalIOC2 port. - */ - return 1; - } - - for (dev = 1; dev < 8; dev++) { - val = read_pci_config(bus, dev, 0, 0); - if (val != 0xffffffff) - break; - } - return (val != 0xffffffff); -} - -/* - * calgary_init_bitmap_from_tce_table(): - * Function for kdump case. In the second/kdump kernel initialize - * the bitmap based on the tce table entries obtained from first kernel - */ -static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) -{ - u64 *tp; - unsigned int index; - tp = ((u64 *)tbl->it_base); - for (index = 0 ; index < tbl->it_size; index++) { - if (*tp != 0x0) - set_bit(index, tbl->it_map); - tp++; - } -} - -/* - * get_tce_space_from_tar(): - * Function for kdump case. Get the tce tables from first kernel - * by reading the contents of the base address register of calgary iommu - */ -static void __init get_tce_space_from_tar(void) -{ - int bus; - void __iomem *target; - unsigned long tce_space; - - for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { - struct calgary_bus_info *info = &bus_info[bus]; - unsigned short pci_device; - u32 val; - - val = read_pci_config(bus, 0, 0, 0); - pci_device = (val & 0xFFFF0000) >> 16; - - if (!is_cal_pci_dev(pci_device)) - continue; - if (info->translation_disabled) - continue; - - if (calgary_bus_has_devices(bus, pci_device) || - translate_empty_slots) { - target = calgary_reg(bus_info[bus].bbar, - tar_offset(bus)); - tce_space = be64_to_cpu(readq(target)); - tce_space = tce_space & TAR_SW_BITS; - - tce_space = tce_space & (~specified_table_size); - info->tce_space = (u64 *)__va(tce_space); - } - } - return; -} - -static int __init calgary_iommu_init(void) -{ - int ret; - - /* ok, we're trying to use Calgary - let's roll */ - printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); - - ret = calgary_init(); - if (ret) { - printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " - "falling back to no_iommu\n", ret); - return ret; - } - - return 0; -} - -int __init detect_calgary(void) -{ - int bus; - void *tbl; - int calgary_found = 0; - unsigned long ptr; - unsigned int offset, prev_offset; - int ret; - - /* - * if the user specified iommu=off or iommu=soft or we found - * another HW IOMMU already, bail out. - */ - if (no_iommu || iommu_detected) - return -ENODEV; - - if (!use_calgary) - return -ENODEV; - - if (!early_pci_allowed()) - return -ENODEV; - - printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); - - ptr = (unsigned long)phys_to_virt(get_bios_ebda()); - - rio_table_hdr = NULL; - prev_offset = 0; - offset = 0x180; - /* - * The next offset is stored in the 1st word. - * Only parse up until the offset increases: - */ - while (offset > prev_offset) { - /* The block id is stored in the 2nd word */ - if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ - /* set the pointer past the offset & block id */ - rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); - break; - } - prev_offset = offset; - offset = *((unsigned short *)(ptr + offset)); - } - if (!rio_table_hdr) { - printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " - "in EBDA - bailing!\n"); - return -ENODEV; - } - - ret = build_detail_arrays(); - if (ret) { - printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); - return -ENOMEM; - } - - specified_table_size = determine_tce_table_size((is_kdump_kernel() ? - saved_max_pfn : max_pfn) * PAGE_SIZE); - - for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { - struct calgary_bus_info *info = &bus_info[bus]; - unsigned short pci_device; - u32 val; - - val = read_pci_config(bus, 0, 0, 0); - pci_device = (val & 0xFFFF0000) >> 16; - - if (!is_cal_pci_dev(pci_device)) - continue; - - if (info->translation_disabled) - continue; - - if (calgary_bus_has_devices(bus, pci_device) || - translate_empty_slots) { - /* - * If it is kdump kernel, find and use tce tables - * from first kernel, else allocate tce tables here - */ - if (!is_kdump_kernel()) { - tbl = alloc_tce_table(); - if (!tbl) - goto cleanup; - info->tce_space = tbl; - } - calgary_found = 1; - } - } - - printk(KERN_DEBUG "Calgary: finished detection, Calgary %s\n", - calgary_found ? "found" : "not found"); - - if (calgary_found) { - iommu_detected = 1; - calgary_detected = 1; - printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); - printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", - specified_table_size); - - x86_init.iommu.iommu_init = calgary_iommu_init; - } - return calgary_found; - -cleanup: - for (--bus; bus >= 0; --bus) { - struct calgary_bus_info *info = &bus_info[bus]; - - if (info->tce_space) - free_tce_table(info->tce_space); - } - return -ENOMEM; -} - -static int __init calgary_parse_options(char *p) -{ - unsigned int bridge; - size_t len; - char* endp; - - while (*p) { - if (!strncmp(p, "64k", 3)) - specified_table_size = TCE_TABLE_SIZE_64K; - else if (!strncmp(p, "128k", 4)) - specified_table_size = TCE_TABLE_SIZE_128K; - else if (!strncmp(p, "256k", 4)) - specified_table_size = TCE_TABLE_SIZE_256K; - else if (!strncmp(p, "512k", 4)) - specified_table_size = TCE_TABLE_SIZE_512K; - else if (!strncmp(p, "1M", 2)) - specified_table_size = TCE_TABLE_SIZE_1M; - else if (!strncmp(p, "2M", 2)) - specified_table_size = TCE_TABLE_SIZE_2M; - else if (!strncmp(p, "4M", 2)) - specified_table_size = TCE_TABLE_SIZE_4M; - else if (!strncmp(p, "8M", 2)) - specified_table_size = TCE_TABLE_SIZE_8M; - - len = strlen("translate_empty_slots"); - if (!strncmp(p, "translate_empty_slots", len)) - translate_empty_slots = 1; - - len = strlen("disable"); - if (!strncmp(p, "disable", len)) { - p += len; - if (*p == '=') - ++p; - if (*p == '\0') - break; - bridge = simple_strtoul(p, &endp, 0); - if (p == endp) - break; - - if (bridge < MAX_PHB_BUS_NUM) { - printk(KERN_INFO "Calgary: disabling " - "translation for PHB %#x\n", bridge); - bus_info[bridge].translation_disabled = 1; - } - } - - p = strpbrk(p, ","); - if (!p) - break; - - p++; /* skip ',' */ - } - return 1; -} -__setup("calgary=", calgary_parse_options); - -static void __init calgary_fixup_one_tce_space(struct pci_dev *dev) -{ - struct iommu_table *tbl; - unsigned int npages; - int i; - - tbl = pci_iommu(dev->bus); - - for (i = 0; i < 4; i++) { - struct resource *r = &dev->resource[PCI_BRIDGE_RESOURCES + i]; - - /* Don't give out TCEs that map MEM resources */ - if (!(r->flags & IORESOURCE_MEM)) - continue; - - /* 0-based? we reserve the whole 1st MB anyway */ - if (!r->start) - continue; - - /* cover the whole region */ - npages = resource_size(r) >> PAGE_SHIFT; - npages++; - - iommu_range_reserve(tbl, r->start, npages); - } -} - -static int __init calgary_fixup_tce_spaces(void) -{ - struct pci_dev *dev = NULL; - struct calgary_bus_info *info; - - if (no_iommu || swiotlb || !calgary_detected) - return -ENODEV; - - printk(KERN_DEBUG "Calgary: fixing up tce spaces\n"); - - do { - dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); - if (!dev) - break; - if (!is_cal_pci_dev(dev->device)) - continue; - - info = &bus_info[dev->bus->number]; - if (info->translation_disabled) - continue; - - if (!info->tce_space) - continue; - - calgary_fixup_one_tce_space(dev); - - } while (1); - - return 0; -} - -/* - * We need to be call after pcibios_assign_resources (fs_initcall level) - * and before device_initcall. - */ -rootfs_initcall(calgary_fixup_tce_spaces); - -IOMMU_INIT_POST(detect_calgary); diff --git a/ANDROID_3.4.5/arch/x86/kernel/pci-dma.c b/ANDROID_3.4.5/arch/x86/kernel/pci-dma.c deleted file mode 100644 index 3003250a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/pci-dma.c +++ /dev/null @@ -1,273 +0,0 @@ -#include <linux/dma-mapping.h> -#include <linux/dma-debug.h> -#include <linux/dmar.h> -#include <linux/export.h> -#include <linux/bootmem.h> -#include <linux/gfp.h> -#include <linux/pci.h> -#include <linux/kmemleak.h> - -#include <asm/proto.h> -#include <asm/dma.h> -#include <asm/iommu.h> -#include <asm/gart.h> -#include <asm/calgary.h> -#include <asm/x86_init.h> -#include <asm/iommu_table.h> - -static int forbid_dac __read_mostly; - -struct dma_map_ops *dma_ops = &nommu_dma_ops; -EXPORT_SYMBOL(dma_ops); - -static int iommu_sac_force __read_mostly; - -#ifdef CONFIG_IOMMU_DEBUG -int panic_on_overflow __read_mostly = 1; -int force_iommu __read_mostly = 1; -#else -int panic_on_overflow __read_mostly = 0; -int force_iommu __read_mostly = 0; -#endif - -int iommu_merge __read_mostly = 0; - -int no_iommu __read_mostly; -/* Set this to 1 if there is a HW IOMMU in the system */ -int iommu_detected __read_mostly = 0; - -/* - * This variable becomes 1 if iommu=pt is passed on the kernel command line. - * If this variable is 1, IOMMU implementations do no DMA translation for - * devices and allow every device to access to whole physical memory. This is - * useful if a user wants to use an IOMMU only for KVM device assignment to - * guests and not for driver dma translation. - */ -int iommu_pass_through __read_mostly; - -/* - * Group multi-function PCI devices into a single device-group for the - * iommu_device_group interface. This tells the iommu driver to pretend - * it cannot distinguish between functions of a device, exposing only one - * group for the device. Useful for disallowing use of individual PCI - * functions from userspace drivers. - */ -int iommu_group_mf __read_mostly; - -extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; - -/* Dummy device used for NULL arguments (normally ISA). */ -struct device x86_dma_fallback_dev = { - .init_name = "fallback device", - .coherent_dma_mask = ISA_DMA_BIT_MASK, - .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, -}; -EXPORT_SYMBOL(x86_dma_fallback_dev); - -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES 32768 - -int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - - *dev->dma_mask = mask; - - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - -void __init pci_iommu_alloc(void) -{ - struct iommu_table_entry *p; - - sort_iommu_table(__iommu_table, __iommu_table_end); - check_iommu_entries(__iommu_table, __iommu_table_end); - - for (p = __iommu_table; p < __iommu_table_end; p++) { - if (p && p->detect && p->detect() > 0) { - p->flags |= IOMMU_DETECTED; - if (p->early_init) - p->early_init(); - if (p->flags & IOMMU_FINISH_IF_DETECTED) - break; - } - } -} -void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - struct dma_attrs *attrs) -{ - unsigned long dma_mask; - struct page *page; - dma_addr_t addr; - - dma_mask = dma_alloc_coherent_mask(dev, flag); - - flag |= __GFP_ZERO; -again: - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); - if (!page) - return NULL; - - addr = page_to_phys(page); - if (addr + size > dma_mask) { - __free_pages(page, get_order(size)); - - if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { - flag = (flag & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - return NULL; - } - - *dma_addr = addr; - return page_address(page); -} - -/* - * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel - * parameter documentation. - */ -static __init int iommu_setup(char *p) -{ - iommu_merge = 1; - - if (!p) - return -EINVAL; - - while (*p) { - if (!strncmp(p, "off", 3)) - no_iommu = 1; - /* gart_parse_options has more force support */ - if (!strncmp(p, "force", 5)) - force_iommu = 1; - if (!strncmp(p, "noforce", 7)) { - iommu_merge = 0; - force_iommu = 0; - } - - if (!strncmp(p, "biomerge", 8)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "panic", 5)) - panic_on_overflow = 1; - if (!strncmp(p, "nopanic", 7)) - panic_on_overflow = 0; - if (!strncmp(p, "merge", 5)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "nomerge", 7)) - iommu_merge = 0; - if (!strncmp(p, "forcesac", 8)) - iommu_sac_force = 1; - if (!strncmp(p, "allowdac", 8)) - forbid_dac = 0; - if (!strncmp(p, "nodac", 5)) - forbid_dac = 1; - if (!strncmp(p, "usedac", 6)) { - forbid_dac = -1; - return 1; - } -#ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft", 4)) - swiotlb = 1; -#endif - if (!strncmp(p, "pt", 2)) - iommu_pass_through = 1; - if (!strncmp(p, "group_mf", 8)) - iommu_group_mf = 1; - - gart_parse_options(p); - -#ifdef CONFIG_CALGARY_IOMMU - if (!strncmp(p, "calgary", 7)) - use_calgary = 1; -#endif /* CONFIG_CALGARY_IOMMU */ - - p += strcspn(p, ","); - if (*p == ',') - ++p; - } - return 0; -} -early_param("iommu", iommu_setup); - -int dma_supported(struct device *dev, u64 mask) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - -#ifdef CONFIG_PCI - if (mask > 0xffffffff && forbid_dac > 0) { - dev_info(dev, "PCI: Disallowing DAC for device\n"); - return 0; - } -#endif - - if (ops->dma_supported) - return ops->dma_supported(dev, mask); - - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_BIT_MASK(24)) - return 0; - - /* Tell the device to use SAC when IOMMU force is on. This - allows the driver to use cheaper accesses in some cases. - - Problem with this is that if we overflow the IOMMU area and - return DAC as fallback address the device may not handle it - correctly. - - As a special case some controllers have a 39bit address - mode that is as efficient as 32bit (aic79xx). Don't force - SAC for these. Assume all masks <= 40 bits are of this - type. Normally this doesn't make any difference, but gives - more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { - dev_info(dev, "Force SAC with mask %Lx\n", mask); - return 0; - } - - return 1; -} -EXPORT_SYMBOL(dma_supported); - -static int __init pci_iommu_init(void) -{ - struct iommu_table_entry *p; - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - -#ifdef CONFIG_PCI - dma_debug_add_bus(&pci_bus_type); -#endif - x86_init.iommu.iommu_init(); - - for (p = __iommu_table; p < __iommu_table_end; p++) { - if (p && (p->flags & IOMMU_DETECTED) && p->late_init) - p->late_init(); - } - - return 0; -} -/* Must execute after PCI subsystem */ -rootfs_initcall(pci_iommu_init); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if (forbid_dac == 0) { - dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, - PCI_CLASS_BRIDGE_PCI, 8, via_no_dac); -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/pci-iommu_table.c b/ANDROID_3.4.5/arch/x86/kernel/pci-iommu_table.c deleted file mode 100644 index 35ccf756..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/pci-iommu_table.c +++ /dev/null @@ -1,79 +0,0 @@ -#include <linux/dma-mapping.h> -#include <asm/iommu_table.h> -#include <linux/string.h> -#include <linux/kallsyms.h> - - -#define DEBUG 1 - -static struct iommu_table_entry * __init -find_dependents_of(struct iommu_table_entry *start, - struct iommu_table_entry *finish, - struct iommu_table_entry *q) -{ - struct iommu_table_entry *p; - - if (!q) - return NULL; - - for (p = start; p < finish; p++) - if (p->detect == q->depend) - return p; - - return NULL; -} - - -void __init sort_iommu_table(struct iommu_table_entry *start, - struct iommu_table_entry *finish) { - - struct iommu_table_entry *p, *q, tmp; - - for (p = start; p < finish; p++) { -again: - q = find_dependents_of(start, finish, p); - /* We are bit sneaky here. We use the memory address to figure - * out if the node we depend on is past our point, if so, swap. - */ - if (q > p) { - tmp = *p; - memmove(p, q, sizeof(*p)); - *q = tmp; - goto again; - } - } - -} - -#ifdef DEBUG -void __init check_iommu_entries(struct iommu_table_entry *start, - struct iommu_table_entry *finish) -{ - struct iommu_table_entry *p, *q, *x; - - /* Simple cyclic dependency checker. */ - for (p = start; p < finish; p++) { - q = find_dependents_of(start, finish, p); - x = find_dependents_of(start, finish, q); - if (p == x) { - printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n", - p->detect, q->detect); - /* Heavy handed way..*/ - x->depend = 0; - } - } - - for (p = start; p < finish; p++) { - q = find_dependents_of(p, finish, p); - if (q && q > p) { - printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n", - p->detect, q->detect); - } - } -} -#else -inline void check_iommu_entries(struct iommu_table_entry *start, - struct iommu_table_entry *finish) -{ -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/pci-nommu.c b/ANDROID_3.4.5/arch/x86/kernel/pci-nommu.c deleted file mode 100644 index f9605068..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/pci-nommu.c +++ /dev/null @@ -1,106 +0,0 @@ -/* Fallback functions when the main IOMMU code is not compiled in. This - code is roughly equivalent to i386. */ -#include <linux/dma-mapping.h> -#include <linux/scatterlist.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/gfp.h> -#include <linux/pci.h> -#include <linux/mm.h> - -#include <asm/processor.h> -#include <asm/iommu.h> -#include <asm/dma.h> - -static int -check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) -{ - if (hwdev && !dma_capable(hwdev, bus, size)) { - if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) - printk(KERN_ERR - "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static dma_addr_t nommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - dma_addr_t bus = page_to_phys(page) + offset; - WARN_ON(size == 0); - if (!check_addr("map_single", dev, bus, size)) - return DMA_ERROR_CODE; - flush_write_buffers(); - return bus; -} - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - BUG_ON(!sg_page(s)); - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - s->dma_length = s->length; - } - flush_write_buffers(); - return nents; -} - -static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} - -static void nommu_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - - -static void nommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - -struct dma_map_ops nommu_dma_ops = { - .alloc = dma_generic_alloc_coherent, - .free = nommu_free_coherent, - .map_sg = nommu_map_sg, - .map_page = nommu_map_page, - .sync_single_for_device = nommu_sync_single_for_device, - .sync_sg_for_device = nommu_sync_sg_for_device, - .is_phys = 1, -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/pci-swiotlb.c b/ANDROID_3.4.5/arch/x86/kernel/pci-swiotlb.c deleted file mode 100644 index 6c483ba9..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/pci-swiotlb.c +++ /dev/null @@ -1,109 +0,0 @@ -/* Glue code to lib/swiotlb.c */ - -#include <linux/pci.h> -#include <linux/cache.h> -#include <linux/module.h> -#include <linux/swiotlb.h> -#include <linux/bootmem.h> -#include <linux/dma-mapping.h> - -#include <asm/iommu.h> -#include <asm/swiotlb.h> -#include <asm/dma.h> -#include <asm/xen/swiotlb-xen.h> -#include <asm/iommu_table.h> -int swiotlb __read_mostly; - -static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) -{ - void *vaddr; - - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, - attrs); - if (vaddr) - return vaddr; - - return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); -} - -static void x86_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs) -{ - swiotlb_free_coherent(dev, size, vaddr, dma_addr); -} - -static struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc = x86_swiotlb_alloc_coherent, - .free = x86_swiotlb_free_coherent, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = NULL, -}; - -/* - * pci_swiotlb_detect_override - set swiotlb to 1 if necessary - * - * This returns non-zero if we are forced to use swiotlb (by the boot - * option). - */ -int __init pci_swiotlb_detect_override(void) -{ - int use_swiotlb = swiotlb | swiotlb_force; - - if (swiotlb_force) - swiotlb = 1; - - return use_swiotlb; -} -IOMMU_INIT_FINISH(pci_swiotlb_detect_override, - pci_xen_swiotlb_detect, - pci_swiotlb_init, - pci_swiotlb_late_init); - -/* - * if 4GB or more detected (and iommu=off not set) return 1 - * and set swiotlb to 1. - */ -int __init pci_swiotlb_detect_4gb(void) -{ - /* don't initialize swiotlb if iommu=off (no_iommu=1) */ -#ifdef CONFIG_X86_64 - if (!no_iommu && max_pfn > MAX_DMA32_PFN) - swiotlb = 1; -#endif - return swiotlb; -} -IOMMU_INIT(pci_swiotlb_detect_4gb, - pci_swiotlb_detect_override, - pci_swiotlb_init, - pci_swiotlb_late_init); - -void __init pci_swiotlb_init(void) -{ - if (swiotlb) { - swiotlb_init(0); - dma_ops = &swiotlb_dma_ops; - } -} - -void __init pci_swiotlb_late_init(void) -{ - /* An IOMMU turned us off. */ - if (!swiotlb) - swiotlb_free(); - else { - printk(KERN_INFO "PCI-DMA: " - "Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_print_info(); - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/pcspeaker.c b/ANDROID_3.4.5/arch/x86/kernel/pcspeaker.c deleted file mode 100644 index a311ffca..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/pcspeaker.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <linux/platform_device.h> -#include <linux/err.h> -#include <linux/init.h> - -static __init int add_pcspkr(void) -{ - struct platform_device *pd; - - pd = platform_device_register_simple("pcspkr", -1, NULL, 0); - - return IS_ERR(pd) ? PTR_ERR(pd) : 0; -} -device_initcall(add_pcspkr); diff --git a/ANDROID_3.4.5/arch/x86/kernel/probe_roms.c b/ANDROID_3.4.5/arch/x86/kernel/probe_roms.c deleted file mode 100644 index 0bc72e20..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/probe_roms.c +++ /dev/null @@ -1,268 +0,0 @@ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/uaccess.h> -#include <linux/mmzone.h> -#include <linux/ioport.h> -#include <linux/seq_file.h> -#include <linux/console.h> -#include <linux/init.h> -#include <linux/edd.h> -#include <linux/dmi.h> -#include <linux/pfn.h> -#include <linux/pci.h> -#include <linux/export.h> - -#include <asm/probe_roms.h> -#include <asm/pci-direct.h> -#include <asm/e820.h> -#include <asm/mmzone.h> -#include <asm/setup.h> -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/setup_arch.h> - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -/* does this oprom support the given pci device, or any of the devices - * that the driver supports? - */ -static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device) -{ - struct pci_driver *drv = pdev->driver; - const struct pci_device_id *id; - - if (pdev->vendor == vendor && pdev->device == device) - return true; - - for (id = drv ? drv->id_table : NULL; id && id->vendor; id++) - if (id->vendor == vendor && id->device == device) - break; - - return id && id->vendor; -} - -static bool probe_list(struct pci_dev *pdev, unsigned short vendor, - const unsigned char *rom_list) -{ - unsigned short device; - - do { - if (probe_kernel_address(rom_list, device) != 0) - device = 0; - - if (device && match_id(pdev, vendor, device)) - break; - - rom_list += 2; - } while (device); - - return !!device; -} - -static struct resource *find_oprom(struct pci_dev *pdev) -{ - struct resource *oprom = NULL; - int i; - - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { - struct resource *res = &adapter_rom_resources[i]; - unsigned short offset, vendor, device, list, rev; - const unsigned char *rom; - - if (res->end == 0) - break; - - rom = isa_bus_to_virt(res->start); - if (probe_kernel_address(rom + 0x18, offset) != 0) - continue; - - if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) - continue; - - if (probe_kernel_address(rom + offset + 0x6, device) != 0) - continue; - - if (match_id(pdev, vendor, device)) { - oprom = res; - break; - } - - if (probe_kernel_address(rom + offset + 0x8, list) == 0 && - probe_kernel_address(rom + offset + 0xc, rev) == 0 && - rev >= 3 && list && - probe_list(pdev, vendor, rom + offset + list)) { - oprom = res; - break; - } - } - - return oprom; -} - -void *pci_map_biosrom(struct pci_dev *pdev) -{ - struct resource *oprom = find_oprom(pdev); - - if (!oprom) - return NULL; - - return ioremap(oprom->start, resource_size(oprom)); -} -EXPORT_SYMBOL(pci_map_biosrom); - -void pci_unmap_biosrom(void __iomem *image) -{ - iounmap(image); -} -EXPORT_SYMBOL(pci_unmap_biosrom); - -size_t pci_biosrom_size(struct pci_dev *pdev) -{ - struct resource *oprom = find_oprom(pdev); - - return oprom ? resource_size(oprom) : 0; -} -EXPORT_SYMBOL(pci_biosrom_size); - -#define ROMSIGNATURE 0xaa55 - -static int __init romsignature(const unsigned char *rom) -{ - const unsigned short * const ptr = (const unsigned short *)rom; - unsigned short sig; - - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; -} - -static int __init romchecksum(const unsigned char *rom, unsigned long length) -{ - unsigned char sum, c; - - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) - sum += c; - return !length && !sum; -} - -void __init probe_roms(void) -{ - const unsigned char *rom; - unsigned long start, length, upper; - unsigned char c; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = resource_size(&extension_rom_resource); - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/process.c b/ANDROID_3.4.5/arch/x86/kernel/process.c deleted file mode 100644 index fdd151ce..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/process.c +++ /dev/null @@ -1,745 +0,0 @@ -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/prctl.h> -#include <linux/slab.h> -#include <linux/sched.h> -#include <linux/module.h> -#include <linux/pm.h> -#include <linux/clockchips.h> -#include <linux/random.h> -#include <linux/user-return-notifier.h> -#include <linux/dmi.h> -#include <linux/utsname.h> -#include <linux/stackprotector.h> -#include <linux/tick.h> -#include <linux/cpuidle.h> -#include <trace/events/power.h> -#include <linux/hw_breakpoint.h> -#include <asm/cpu.h> -#include <asm/apic.h> -#include <asm/syscalls.h> -#include <asm/idle.h> -#include <asm/uaccess.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/debugreg.h> -#include <asm/nmi.h> - -#ifdef CONFIG_X86_64 -static DEFINE_PER_CPU(unsigned char, is_idle); -#endif - -struct kmem_cache *task_xstate_cachep; -EXPORT_SYMBOL_GPL(task_xstate_cachep); - -int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) -{ - int ret; - - *dst = *src; - if (fpu_allocated(&src->thread.fpu)) { - memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); - ret = fpu_alloc(&dst->thread.fpu); - if (ret) - return ret; - fpu_copy(&dst->thread.fpu, &src->thread.fpu); - } - return 0; -} - -void free_thread_xstate(struct task_struct *tsk) -{ - fpu_free(&tsk->thread.fpu); -} - -void free_thread_info(struct thread_info *ti) -{ - free_thread_xstate(ti->task); - free_pages((unsigned long)ti, THREAD_ORDER); -} - -void arch_task_cache_init(void) -{ - task_xstate_cachep = - kmem_cache_create("task_xstate", xstate_size, - __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); -} - -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - struct task_struct *me = current; - struct thread_struct *t = &me->thread; - unsigned long *bp = t->io_bitmap_ptr; - - if (bp) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); - - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, t->io_bitmap_max); - t->io_bitmap_max = 0; - put_cpu(); - kfree(bp); - } -} - -void show_regs(struct pt_regs *regs) -{ - show_registers(regs); - show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0); -} - -void show_regs_common(void) -{ - const char *vendor, *product, *board; - - vendor = dmi_get_system_info(DMI_SYS_VENDOR); - if (!vendor) - vendor = ""; - product = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!product) - product = ""; - - /* Board Name is optional */ - board = dmi_get_system_info(DMI_BOARD_NAME); - - printk(KERN_CONT "\n"); - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk(KERN_CONT " %s %s", vendor, product); - if (board) - printk(KERN_CONT "/%s", board); - printk(KERN_CONT "\n"); -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - flush_ptrace_hw_breakpoint(tsk); - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - clear_fpu(tsk); - clear_used_math(); -} - -static void hard_disable_TSC(void) -{ - write_cr4(read_cr4() | X86_CR4_TSD); -} - -void disable_TSC(void) -{ - preempt_disable(); - if (!test_and_set_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_disable_TSC(); - preempt_enable(); -} - -static void hard_enable_TSC(void) -{ - write_cr4(read_cr4() & ~X86_CR4_TSD); -} - -static void enable_TSC(void) -{ - preempt_disable(); - if (test_and_clear_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_enable_TSC(); - preempt_enable(); -} - -int get_tsc_mode(unsigned long adr) -{ - unsigned int val; - - if (test_thread_flag(TIF_NOTSC)) - val = PR_TSC_SIGSEGV; - else - val = PR_TSC_ENABLE; - - return put_user(val, (unsigned int __user *)adr); -} - -int set_tsc_mode(unsigned int val) -{ - if (val == PR_TSC_SIGSEGV) - disable_TSC(); - else if (val == PR_TSC_ENABLE) - enable_TSC(); - else - return -EINVAL; - - return 0; -} - -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) -{ - struct thread_struct *prev, *next; - - prev = &prev_p->thread; - next = &next_p->thread; - - if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ - test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) - debugctl |= DEBUGCTLMSR_BTF; - - update_debugctlmsr(debugctl); - } - - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); - } - - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } - propagate_user_return_notify(prev_p, next_p); -} - -int sys_fork(struct pt_regs *regs) -{ - return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -int sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, - NULL, NULL); -} - -long -sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) -{ - if (!newsp) - newsp = regs->sp; - return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); -} - -/* - * This gets run with %si containing the - * function to call, and %di containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.si = (unsigned long) fn; - regs.di = (unsigned long) arg; - -#ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; - regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; -#else - regs.ss = __KERNEL_DS; -#endif - - regs.orig_ax = -1; - regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - -/* - * sys_execve() executes a new program. - */ -long sys_execve(const char __user *name, - const char __user *const __user *argv, - const char __user *const __user *envp, struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(name); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - return error; - error = do_execve(filename, argv, envp, regs); - -#ifdef CONFIG_X86_32 - if (error == 0) { - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } -#endif - - putname(filename); - return error; -} - -/* - * Idle related variables and functions - */ -unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; -EXPORT_SYMBOL(boot_option_idle_override); - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(pm_idle); -#endif - -static inline int hlt_use_halt(void) -{ - return 1; -} - -#ifndef CONFIG_SMP -static inline void play_dead(void) -{ - BUG(); -} -#endif - -#ifdef CONFIG_X86_64 -void enter_idle(void) -{ - percpu_write(is_idle, 1); - idle_notifier_call_chain(IDLE_START); -} - -static void __exit_idle(void) -{ - if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) - return; - idle_notifier_call_chain(IDLE_END); -} - -/* Called from interrupts to signify idle end */ -void exit_idle(void) -{ - /* idle loop has pid 0 */ - if (current->pid) - return; - __exit_idle(); -} -#endif - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. CPU0 already has it initialized but no harm in - * doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); - current_thread_info()->status |= TS_POLLING; - - while (1) { - tick_nohz_idle_enter(); - - while (!need_resched()) { - rmb(); - - if (cpu_is_offline(smp_processor_id())) - play_dead(); - - /* - * Idle routines should keep interrupts disabled - * from here on, until they go to idle. - * Otherwise, idle callbacks can misfire. - */ - local_touch_nmi(); - local_irq_disable(); - - enter_idle(); - - /* Don't trace irqs off for idle */ - stop_critical_timings(); - - /* enter_idle() needs rcu for notifiers */ - rcu_idle_enter(); - - if (cpuidle_idle_call()) - pm_idle(); - - rcu_idle_exit(); - start_critical_timings(); - - /* In many cases the interrupt that ended idle - has already called exit_idle. But some idle - loops can be woken up without interrupt. */ - __exit_idle(); - } - - tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } -} - -/* - * We use this if we don't have any better - * idle routine.. - */ -void default_idle(void) -{ - if (hlt_use_halt()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle_rcuidle(1, smp_processor_id()); - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - - if (!need_resched()) - safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; - trace_power_end_rcuidle(smp_processor_id()); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); - } else { - local_irq_enable(); - /* loop is done by the caller */ - cpu_relax(); - } -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(default_idle); -#endif - -bool set_pm_idle_to_default(void) -{ - bool ret = !!pm_idle; - - pm_idle = default_idle; - - return ret; -} -void stop_this_cpu(void *dummy) -{ - local_irq_disable(); - /* - * Remove this CPU: - */ - set_cpu_online(smp_processor_id(), false); - disable_local_APIC(); - - for (;;) { - if (hlt_works(smp_processor_id())) - halt(); - } -} - -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle_rcuidle(1, smp_processor_id()); - if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) - clflush((void *)¤t_thread_info()->flags); - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - trace_power_end_rcuidle(smp_processor_id()); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); - } else - local_irq_enable(); -} - -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->work.need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); - trace_cpu_idle_rcuidle(0, smp_processor_id()); - local_irq_enable(); - while (!need_resched()) - cpu_relax(); - trace_power_end_rcuidle(smp_processor_id()); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); -} - -/* - * mwait selection logic: - * - * It depends on the CPU. For AMD CPUs that support MWAIT this is - * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings - * then depend on a clock divisor and current Pstate of the core. If - * all cores of a processor are in halt state (C1) the processor can - * enter the C1E (C1 enhanced) state. If mwait is used this will never - * happen. - * - * idle=mwait overrides this decision and forces the usage of mwait. - */ - -#define MWAIT_INFO 0x05 -#define MWAIT_ECX_EXTENDED_INFO 0x01 -#define MWAIT_EDX_C1 0xf0 - -int mwait_usable(const struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - - if (boot_option_idle_override == IDLE_FORCE_MWAIT) - return 1; - - if (c->cpuid_level < MWAIT_INFO) - return 0; - - cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); - /* Check, whether EDX has extended info about MWAIT */ - if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) - return 1; - - /* - * edx enumeratios MONITOR/MWAIT extensions. Check, whether - * C1 supports MWAIT - */ - return (edx & MWAIT_EDX_C1); -} - -bool amd_e400_c1e_detected; -EXPORT_SYMBOL(amd_e400_c1e_detected); - -static cpumask_var_t amd_e400_c1e_mask; - -void amd_e400_remove_cpu(int cpu) -{ - if (amd_e400_c1e_mask != NULL) - cpumask_clear_cpu(cpu, amd_e400_c1e_mask); -} - -/* - * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt - * pending message MSR. If we detect C1E, then we handle it the same - * way as C3 power states (local apic timer and TSC stop) - */ -static void amd_e400_idle(void) -{ - if (need_resched()) - return; - - if (!amd_e400_c1e_detected) { - u32 lo, hi; - - rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); - - if (lo & K8_INTP_C1E_ACTIVE_MASK) { - amd_e400_c1e_detected = true; - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - mark_tsc_unstable("TSC halt in AMD C1E"); - printk(KERN_INFO "System has AMD C1E enabled\n"); - } - } - - if (amd_e400_c1e_detected) { - int cpu = smp_processor_id(); - - if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { - cpumask_set_cpu(cpu, amd_e400_c1e_mask); - /* - * Force broadcast so ACPI can not interfere. - */ - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - &cpu); - printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", - cpu); - } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); - - default_idle(); - - /* - * The switch back from broadcast mode needs to be - * called with interrupts disabled. - */ - local_irq_disable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); - local_irq_enable(); - } else - default_idle(); -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (pm_idle) - return; - - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * One CPU supports mwait => All CPUs supports mwait - */ - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } else if (cpu_has_amd_erratum(amd_erratum_400)) { - /* E400: APIC timer interrupt does not wake up CPU from C1e */ - printk(KERN_INFO "using AMD E400 aware idle routine\n"); - pm_idle = amd_e400_idle; - } else - pm_idle = default_idle; -} - -void __init init_amd_e400_c1e_mask(void) -{ - /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ - if (pm_idle == amd_e400_idle) - zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); -} - -static int __init idle_setup(char *str) -{ - if (!str) - return -EINVAL; - - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - boot_option_idle_override = IDLE_POLL; - } else if (!strcmp(str, "mwait")) { - boot_option_idle_override = IDLE_FORCE_MWAIT; - WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); - } else if (!strcmp(str, "halt")) { - /* - * When the boot option of idle=halt is added, halt is - * forced to be used for CPU idle. In such case CPU C2/C3 - * won't be used again. - * To continue to load the CPU idle driver, don't touch - * the boot_option_idle_override. - */ - pm_idle = default_idle; - boot_option_idle_override = IDLE_HALT; - } else if (!strcmp(str, "nomwait")) { - /* - * If the boot option of "idle=nomwait" is added, - * it means that mwait will be disabled for CPU C2/C3 - * states. In such case it won't touch the variable - * of boot_option_idle_override. - */ - boot_option_idle_override = IDLE_NOMWAIT; - } else - return -1; - - return 0; -} -early_param("idle", idle_setup); - -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/process_32.c b/ANDROID_3.4.5/arch/x86/kernel/process_32.c deleted file mode 100644 index ae684730..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/process_32.c +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@valinux.com>, May 2000 - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#include <linux/cpu.h> -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/elfcore.h> -#include <linux/smp.h> -#include <linux/stddef.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/user.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/init.h> -#include <linux/mc146818rtc.h> -#include <linux/module.h> -#include <linux/kallsyms.h> -#include <linux/ptrace.h> -#include <linux/personality.h> -#include <linux/percpu.h> -#include <linux/prctl.h> -#include <linux/ftrace.h> -#include <linux/uaccess.h> -#include <linux/io.h> -#include <linux/kdebug.h> - -#include <asm/pgtable.h> -#include <asm/ldt.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/desc.h> -#ifdef CONFIG_MATH_EMULATION -#include <asm/math_emu.h> -#endif - -#include <linux/err.h> - -#include <asm/tlbflush.h> -#include <asm/cpu.h> -#include <asm/idle.h> -#include <asm/syscalls.h> -#include <asm/debugreg.h> -#include <asm/switch_to.h> - -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); - -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *)tsk->thread.sp)[3]; -} - -void __show_regs(struct pt_regs *regs, int all) -{ - unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; - unsigned long d0, d1, d2, d3, d6, d7; - unsigned long sp; - unsigned short ss, gs; - - if (user_mode_vm(regs)) { - sp = regs->sp; - ss = regs->ss & 0xffff; - gs = get_user_gs(regs); - } else { - sp = kernel_stack_pointer(regs); - savesegment(ss, ss); - savesegment(gs, gs); - } - - show_regs_common(); - - printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", - (u16)regs->cs, regs->ip, regs->flags, - smp_processor_id()); - print_symbol("EIP is at %s\n", regs->ip); - - printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->ax, regs->bx, regs->cx, regs->dx); - printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", - regs->si, regs->di, regs->bp, sp); - printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); - - if (!all) - return; - - cr0 = read_cr0(); - cr2 = read_cr2(); - cr3 = read_cr3(); - cr4 = read_cr4_safe(); - printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", - cr0, cr2, cr3, cr4); - - get_debugreg(d0, 0); - get_debugreg(d1, 1); - get_debugreg(d2, 2); - get_debugreg(d3, 3); - printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", - d0, d1, d2, d3); - - get_debugreg(d6, 6); - get_debugreg(d7, 7); - printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", - d6, d7); -} - -void release_thread(struct task_struct *dead_task) -{ - BUG_ON(dead_task->mm); - release_vm86_irqs(dead_task); -} - -/* - * This gets called before we allocate a new thread and copy - * the current task into it. - */ -void prepare_to_copy(struct task_struct *tsk) -{ - unlazy_fpu(tsk); -} - -int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long unused, - struct task_struct *p, struct pt_regs *regs) -{ - struct pt_regs *childregs; - struct task_struct *tsk; - int err; - - childregs = task_pt_regs(p); - *childregs = *regs; - childregs->ax = 0; - childregs->sp = sp; - - p->thread.sp = (unsigned long) childregs; - p->thread.sp0 = (unsigned long) (childregs+1); - - p->thread.ip = (unsigned long) ret_from_fork; - - task_user_gs(p) = get_user_gs(regs); - - p->fpu_counter = 0; - p->thread.io_bitmap_ptr = NULL; - tsk = current; - err = -ENOMEM; - - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - err = 0; - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)childregs->si, 0); - - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - return err; -} - -void -start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) -{ - set_user_gs(regs, 0); - regs->fs = 0; - regs->ds = __USER_DS; - regs->es = __USER_DS; - regs->ss = __USER_DS; - regs->cs = __USER_CS; - regs->ip = new_ip; - regs->sp = new_sp; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); -} -EXPORT_SYMBOL_GPL(start_thread); - - -/* - * switch_to(x,y) should switch tasks from x to y. - * - * We fsave/fwait so that an exception goes off at the right time - * (as a call from the fsave or fwait in effect) rather than to - * the wrong process. Lazy FP saving no longer makes any sense - * with modern CPU's, and this simplifies a lot of things (SMP - * and UP become the same). - * - * NOTE! We used to use the x86 hardware context switching. The - * reason for not using it any more becomes apparent when you - * try to recover gracefully from saved state that is no longer - * valid (stale segment register values in particular). With the - * hardware task-switch, there is no way to fix up bad state in - * a reasonable manner. - * - * The fact that Intel documents the hardware task-switching to - * be slow is a fairly red herring - this code is not noticeably - * faster. However, there _is_ some room for improvement here, - * so the performance issues may eventually be a valid point. - * More important, however, is the fact that this allows us much - * more flexibility. - * - * The return value (in %ax) will be the "prev" task after - * the task-switch, and shows up in ret_from_fork in entry.S, - * for example. - */ -__notrace_funcgraph struct task_struct * -__switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *prev = &prev_p->thread, - *next = &next_p->thread; - int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - fpu_switch_t fpu; - - /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - - fpu = switch_fpu_prepare(prev_p, next_p, cpu); - - /* - * Reload esp0. - */ - load_sp0(tss, next); - - /* - * Save away %gs. No need to save %fs, as it was saved on the - * stack on entry. No need to save %es and %ds, as those are - * always kernel segments while inside the kernel. Doing this - * before setting the new TLS descriptors avoids the situation - * where we temporarily have non-reloadable segments in %fs - * and %gs. This could be an issue if the NMI handler ever - * used %fs or %gs (it does not today), or if the kernel is - * running inside of a hypervisor layer. - */ - lazy_save_gs(prev->gs); - - /* - * Load the per-thread Thread-Local Storage descriptor. - */ - load_TLS(next, cpu); - - /* - * Restore IOPL if needed. In normal use, the flags restore - * in the switch assembly will handle this. But if the kernel - * is running virtualized at a non-zero CPL, the popf will - * not restore flags, so it must be done in a separate step. - */ - if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); - - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); - - /* - * Leave lazy mode, flushing any hypercalls made here. - * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up - * to date. - */ - arch_end_context_switch(next_p); - - /* - * Restore %gs if needed (which is common) - */ - if (prev->gs | next->gs) - lazy_load_gs(next->gs); - - switch_fpu_finish(next_p, fpu); - - percpu_write(current_task, next_p); - - return prev_p; -} - -#define top_esp (THREAD_SIZE - sizeof(unsigned long)) -#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long bp, sp, ip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)task_stack_page(p); - sp = p->thread.sp; - if (!stack_page || sp < stack_page || sp > top_esp+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes bp last. */ - bp = *(unsigned long *) sp; - do { - if (bp < stack_page || bp > top_ebp+stack_page) - return 0; - ip = *(unsigned long *) (bp+4); - if (!in_sched_functions(ip)) - return ip; - bp = *(unsigned long *) bp; - } while (count++ < 16); - return 0; -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/process_64.c b/ANDROID_3.4.5/arch/x86/kernel/process_64.c deleted file mode 100644 index 43d8b48b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/process_64.c +++ /dev/null @@ -1,556 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@valinux.com>, May 2000 - * - * X86-64 port - * Andi Kleen. - * - * CPU hotplug support - ashok.raj@intel.com - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#include <linux/cpu.h> -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/elfcore.h> -#include <linux/smp.h> -#include <linux/slab.h> -#include <linux/user.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/module.h> -#include <linux/ptrace.h> -#include <linux/notifier.h> -#include <linux/kprobes.h> -#include <linux/kdebug.h> -#include <linux/prctl.h> -#include <linux/uaccess.h> -#include <linux/io.h> -#include <linux/ftrace.h> - -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/mmu_context.h> -#include <asm/prctl.h> -#include <asm/desc.h> -#include <asm/proto.h> -#include <asm/ia32.h> -#include <asm/idle.h> -#include <asm/syscalls.h> -#include <asm/debugreg.h> -#include <asm/switch_to.h> - -asmlinkage extern void ret_from_fork(void); - -DEFINE_PER_CPU(unsigned long, old_rsp); - -/* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs *regs, int all) -{ - unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; - unsigned long d0, d1, d2, d3, d6, d7; - unsigned int fsindex, gsindex; - unsigned int ds, cs, es; - - show_regs_common(); - printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); - printk_address(regs->ip, 1); - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, - regs->sp, regs->flags); - printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", - regs->ax, regs->bx, regs->cx); - printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", - regs->dx, regs->si, regs->di); - printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", - regs->bp, regs->r8, regs->r9); - printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", - regs->r10, regs->r11, regs->r12); - printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", - regs->r13, regs->r14, regs->r15); - - asm("movl %%ds,%0" : "=r" (ds)); - asm("movl %%cs,%0" : "=r" (cs)); - asm("movl %%es,%0" : "=r" (es)); - asm("movl %%fs,%0" : "=r" (fsindex)); - asm("movl %%gs,%0" : "=r" (gsindex)); - - rdmsrl(MSR_FS_BASE, fs); - rdmsrl(MSR_GS_BASE, gs); - rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); - - if (!all) - return; - - cr0 = read_cr0(); - cr2 = read_cr2(); - cr3 = read_cr3(); - cr4 = read_cr4(); - - printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", - fs, fsindex, gs, gsindex, shadowgs); - printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, - es, cr0); - printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, - cr4); - - get_debugreg(d0, 0); - get_debugreg(d1, 1); - get_debugreg(d2, 2); - printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); - get_debugreg(d3, 3); - get_debugreg(d6, 6); - get_debugreg(d7, 7); - printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); -} - -void release_thread(struct task_struct *dead_task) -{ - if (dead_task->mm) { - if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } - } -} - -static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) -{ - struct user_desc ud = { - .base_addr = addr, - .limit = 0xfffff, - .seg_32bit = 1, - .limit_in_pages = 1, - .useable = 1, - }; - struct desc_struct *desc = t->thread.tls_array; - desc += tls; - fill_ldt(desc, &ud); -} - -static inline u32 read_32bit_tls(struct task_struct *t, int tls) -{ - return get_desc_base(&t->thread.tls_array[tls]); -} - -/* - * This gets called before we allocate a new thread and copy - * the current task into it. - */ -void prepare_to_copy(struct task_struct *tsk) -{ - unlazy_fpu(tsk); -} - -int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long unused, - struct task_struct *p, struct pt_regs *regs) -{ - int err; - struct pt_regs *childregs; - struct task_struct *me = current; - - childregs = ((struct pt_regs *) - (THREAD_SIZE + task_stack_page(p))) - 1; - *childregs = *regs; - - childregs->ax = 0; - if (user_mode(regs)) - childregs->sp = sp; - else - childregs->sp = (unsigned long)childregs; - - p->thread.sp = (unsigned long) childregs; - p->thread.sp0 = (unsigned long) (childregs+1); - p->thread.usersp = me->thread.usersp; - - set_tsk_thread_flag(p, TIF_FORK); - - p->fpu_counter = 0; - p->thread.io_bitmap_ptr = NULL; - - savesegment(gs, p->thread.gsindex); - p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; - savesegment(fs, p->thread.fsindex); - p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; - savesegment(es, p->thread.es); - savesegment(ds, p->thread.ds); - - err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) { -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) - err = do_set_thread_area(p, -1, - (struct user_desc __user *)childregs->si, 0); - else -#endif - err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); - if (err) - goto out; - } - err = 0; -out: - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - - return err; -} - -static void -start_thread_common(struct pt_regs *regs, unsigned long new_ip, - unsigned long new_sp, - unsigned int _cs, unsigned int _ss, unsigned int _ds) -{ - loadsegment(fs, 0); - loadsegment(es, _ds); - loadsegment(ds, _ds); - load_gs_index(0); - current->thread.usersp = new_sp; - regs->ip = new_ip; - regs->sp = new_sp; - percpu_write(old_rsp, new_sp); - regs->cs = _cs; - regs->ss = _ss; - regs->flags = X86_EFLAGS_IF; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); -} - -void -start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) -{ - start_thread_common(regs, new_ip, new_sp, - __USER_CS, __USER_DS, 0); -} - -#ifdef CONFIG_IA32_EMULATION -void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) -{ - start_thread_common(regs, new_ip, new_sp, - test_thread_flag(TIF_X32) - ? __USER_CS : __USER32_CS, - __USER_DS, __USER_DS); -} -#endif - -/* - * switch_to(x,y) should switch tasks from x to y. - * - * This could still be optimized: - * - fold all the options into a flag word and test it with a single test. - * - could test fs/gs bitsliced - * - * Kprobes not supported here. Set the probe on schedule instead. - * Function graph tracer not supported too. - */ -__notrace_funcgraph struct task_struct * -__switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *prev = &prev_p->thread; - struct thread_struct *next = &next_p->thread; - int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - unsigned fsindex, gsindex; - fpu_switch_t fpu; - - fpu = switch_fpu_prepare(prev_p, next_p, cpu); - - /* - * Reload esp0, LDT and the page table pointer: - */ - load_sp0(tss, next); - - /* - * Switch DS and ES. - * This won't pick up thread selector changes, but I guess that is ok. - */ - savesegment(es, prev->es); - if (unlikely(next->es | prev->es)) - loadsegment(es, next->es); - - savesegment(ds, prev->ds); - if (unlikely(next->ds | prev->ds)) - loadsegment(ds, next->ds); - - - /* We must save %fs and %gs before load_TLS() because - * %fs and %gs may be cleared by load_TLS(). - * - * (e.g. xen_load_tls()) - */ - savesegment(fs, fsindex); - savesegment(gs, gsindex); - - load_TLS(next, cpu); - - /* - * Leave lazy mode, flushing any hypercalls made here. - * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up - * to date. - */ - arch_end_context_switch(next_p); - - /* - * Switch FS and GS. - * - * Segment register != 0 always requires a reload. Also - * reload when it has changed. When prev process used 64bit - * base always reload to avoid an information leak. - */ - if (unlikely(fsindex | next->fsindex | prev->fs)) { - loadsegment(fs, next->fsindex); - /* - * Check if the user used a selector != 0; if yes - * clear 64bit base, since overloaded base is always - * mapped to the Null selector - */ - if (fsindex) - prev->fs = 0; - } - /* when next process has a 64bit base use it */ - if (next->fs) - wrmsrl(MSR_FS_BASE, next->fs); - prev->fsindex = fsindex; - - if (unlikely(gsindex | next->gsindex | prev->gs)) { - load_gs_index(next->gsindex); - if (gsindex) - prev->gs = 0; - } - if (next->gs) - wrmsrl(MSR_KERNEL_GS_BASE, next->gs); - prev->gsindex = gsindex; - - switch_fpu_finish(next_p, fpu); - - /* - * Switch the PDA and FPU contexts. - */ - prev->usersp = percpu_read(old_rsp); - percpu_write(old_rsp, next->usersp); - percpu_write(current_task, next_p); - - percpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - KERNEL_STACK_OFFSET); - - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); - - return prev_p; -} - -void set_personality_64bit(void) -{ - /* inherit personality from parent */ - - /* Make sure to be in 64bit mode */ - clear_thread_flag(TIF_IA32); - clear_thread_flag(TIF_ADDR32); - clear_thread_flag(TIF_X32); - - /* Ensure the corresponding mm is not marked. */ - if (current->mm) - current->mm->context.ia32_compat = 0; - - /* TBD: overwrites user setup. Should have two bits. - But 64bit processes have always behaved this way, - so it's not too bad. The main problem is just that - 32bit childs are affected again. */ - current->personality &= ~READ_IMPLIES_EXEC; -} - -void set_personality_ia32(bool x32) -{ - /* inherit personality from parent */ - - /* Make sure to be in 32bit mode */ - set_thread_flag(TIF_ADDR32); - - /* Mark the associated mm as containing 32-bit tasks. */ - if (current->mm) - current->mm->context.ia32_compat = 1; - - if (x32) { - clear_thread_flag(TIF_IA32); - set_thread_flag(TIF_X32); - current->personality &= ~READ_IMPLIES_EXEC; - /* is_compat_task() uses the presence of the x32 - syscall bit flag to determine compat status */ - current_thread_info()->status &= ~TS_COMPAT; - } else { - set_thread_flag(TIF_IA32); - clear_thread_flag(TIF_X32); - current->personality |= force_personality32; - /* Prepare the first "return" to user space */ - current_thread_info()->status |= TS_COMPAT; - } -} -EXPORT_SYMBOL_GPL(set_personality_ia32); - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long stack; - u64 fp, ip; - int count = 0; - - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) - return 0; - fp = *(u64 *)(p->thread.sp); - do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) - return 0; - ip = *(u64 *)(fp+8); - if (!in_sched_functions(ip)) - return ip; - fp = *(u64 *)fp; - } while (count++ < 16); - return 0; -} - -long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; - int doit = task == current; - int cpu; - - switch (code) { - case ARCH_SET_GS: - if (addr >= TASK_SIZE_OF(task)) - return -EPERM; - cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to - switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, GS_TLS, addr); - if (doit) { - load_TLS(&task->thread, cpu); - load_gs_index(GS_TLS_SEL); - } - task->thread.gsindex = GS_TLS_SEL; - task->thread.gs = 0; - } else { - task->thread.gsindex = 0; - task->thread.gs = addr; - if (doit) { - load_gs_index(0); - ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); - } - } - put_cpu(); - break; - case ARCH_SET_FS: - /* Not strictly needed for fs, but do it for symmetry - with gs */ - if (addr >= TASK_SIZE_OF(task)) - return -EPERM; - cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to - switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, FS_TLS, addr); - if (doit) { - load_TLS(&task->thread, cpu); - loadsegment(fs, FS_TLS_SEL); - } - task->thread.fsindex = FS_TLS_SEL; - task->thread.fs = 0; - } else { - task->thread.fsindex = 0; - task->thread.fs = addr; - if (doit) { - /* set the selector to 0 to not confuse - __switch_to */ - loadsegment(fs, 0); - ret = checking_wrmsrl(MSR_FS_BASE, addr); - } - } - put_cpu(); - break; - case ARCH_GET_FS: { - unsigned long base; - if (task->thread.fsindex == FS_TLS_SEL) - base = read_32bit_tls(task, FS_TLS); - else if (doit) - rdmsrl(MSR_FS_BASE, base); - else - base = task->thread.fs; - ret = put_user(base, (unsigned long __user *)addr); - break; - } - case ARCH_GET_GS: { - unsigned long base; - unsigned gsindex; - if (task->thread.gsindex == GS_TLS_SEL) - base = read_32bit_tls(task, GS_TLS); - else if (doit) { - savesegment(gs, gsindex); - if (gsindex) - rdmsrl(MSR_KERNEL_GS_BASE, base); - else - base = task->thread.gs; - } else - base = task->thread.gs; - ret = put_user(base, (unsigned long __user *)addr); - break; - } - - default: - ret = -EINVAL; - break; - } - - return ret; -} - -long sys_arch_prctl(int code, unsigned long addr) -{ - return do_arch_prctl(current, code, addr); -} - -unsigned long KSTK_ESP(struct task_struct *task) -{ - return (test_tsk_thread_flag(task, TIF_IA32)) ? - (task_pt_regs(task)->sp) : ((task)->thread.usersp); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/ptrace.c b/ANDROID_3.4.5/arch/x86/kernel/ptrace.c deleted file mode 100644 index cf117833..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/ptrace.c +++ /dev/null @@ -1,1524 +0,0 @@ -/* By Ross Biro 1/23/92 */ -/* - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@valinux.com>, May 2000 - */ - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/ptrace.h> -#include <linux/regset.h> -#include <linux/tracehook.h> -#include <linux/user.h> -#include <linux/elf.h> -#include <linux/security.h> -#include <linux/audit.h> -#include <linux/seccomp.h> -#include <linux/signal.h> -#include <linux/perf_event.h> -#include <linux/hw_breakpoint.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/debugreg.h> -#include <asm/ldt.h> -#include <asm/desc.h> -#include <asm/prctl.h> -#include <asm/proto.h> -#include <asm/hw_breakpoint.h> -#include <asm/traps.h> - -#include "tls.h" - -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - -enum x86_regset { - REGSET_GENERAL, - REGSET_FP, - REGSET_XFP, - REGSET_IOPERM64 = REGSET_XFP, - REGSET_XSTATE, - REGSET_TLS, - REGSET_IOPERM32, -}; - -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -static const struct pt_regs_offset regoffset_table[] = { -#ifdef CONFIG_X86_64 - REG_OFFSET_NAME(r15), - REG_OFFSET_NAME(r14), - REG_OFFSET_NAME(r13), - REG_OFFSET_NAME(r12), - REG_OFFSET_NAME(r11), - REG_OFFSET_NAME(r10), - REG_OFFSET_NAME(r9), - REG_OFFSET_NAME(r8), -#endif - REG_OFFSET_NAME(bx), - REG_OFFSET_NAME(cx), - REG_OFFSET_NAME(dx), - REG_OFFSET_NAME(si), - REG_OFFSET_NAME(di), - REG_OFFSET_NAME(bp), - REG_OFFSET_NAME(ax), -#ifdef CONFIG_X86_32 - REG_OFFSET_NAME(ds), - REG_OFFSET_NAME(es), - REG_OFFSET_NAME(fs), - REG_OFFSET_NAME(gs), -#endif - REG_OFFSET_NAME(orig_ax), - REG_OFFSET_NAME(ip), - REG_OFFSET_NAME(cs), - REG_OFFSET_NAME(flags), - REG_OFFSET_NAME(sp), - REG_OFFSET_NAME(ss), - REG_OFFSET_END, -}; - -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -/** - * regs_query_register_name() - query register name from its offset - * @offset: the offset of a register in struct pt_regs. - * - * regs_query_register_name() returns the name of a register from its - * offset in struct pt_regs. If the @offset is invalid, this returns NULL; - */ -const char *regs_query_register_name(unsigned int offset) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (roff->offset == offset) - return roff->name; - return NULL; -} - -static const int arg_offs_table[] = { -#ifdef CONFIG_X86_32 - [0] = offsetof(struct pt_regs, ax), - [1] = offsetof(struct pt_regs, dx), - [2] = offsetof(struct pt_regs, cx) -#else /* CONFIG_X86_64 */ - [0] = offsetof(struct pt_regs, di), - [1] = offsetof(struct pt_regs, si), - [2] = offsetof(struct pt_regs, dx), - [3] = offsetof(struct pt_regs, cx), - [4] = offsetof(struct pt_regs, r8), - [5] = offsetof(struct pt_regs, r9) -#endif -}; - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* - * Determines which flags the user has access to [1 = access, 0 = no access]. - */ -#define FLAG_MASK_32 ((unsigned long) \ - (X86_EFLAGS_CF | X86_EFLAGS_PF | \ - X86_EFLAGS_AF | X86_EFLAGS_ZF | \ - X86_EFLAGS_SF | X86_EFLAGS_TF | \ - X86_EFLAGS_DF | X86_EFLAGS_OF | \ - X86_EFLAGS_RF | X86_EFLAGS_AC)) - -/* - * Determines whether a value may be installed in a segment register. - */ -static inline bool invalid_selector(u16 value) -{ - return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL); -} - -#ifdef CONFIG_X86_32 - -#define FLAG_MASK FLAG_MASK_32 - -static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) -{ - BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); - return ®s->bx + (regno >> 2); -} - -static u16 get_segment_reg(struct task_struct *task, unsigned long offset) -{ - /* - * Returning the value truncates it to 16 bits. - */ - unsigned int retval; - if (offset != offsetof(struct user_regs_struct, gs)) - retval = *pt_regs_access(task_pt_regs(task), offset); - else { - if (task == current) - retval = get_user_gs(task_pt_regs(task)); - else - retval = task_user_gs(task); - } - return retval; -} - -static int set_segment_reg(struct task_struct *task, - unsigned long offset, u16 value) -{ - /* - * The value argument was already truncated to 16 bits. - */ - if (invalid_selector(value)) - return -EIO; - - /* - * For %cs and %ss we cannot permit a null selector. - * We can permit a bogus selector as long as it has USER_RPL. - * Null selectors are fine for other segment registers, but - * we will never get back to user mode with invalid %cs or %ss - * and will take the trap in iret instead. Much code relies - * on user_mode() to distinguish a user trap frame (which can - * safely use invalid selectors) from a kernel trap frame. - */ - switch (offset) { - case offsetof(struct user_regs_struct, cs): - case offsetof(struct user_regs_struct, ss): - if (unlikely(value == 0)) - return -EIO; - - default: - *pt_regs_access(task_pt_regs(task), offset) = value; - break; - - case offsetof(struct user_regs_struct, gs): - if (task == current) - set_user_gs(task_pt_regs(task), value); - else - task_user_gs(task) = value; - } - - return 0; -} - -#else /* CONFIG_X86_64 */ - -#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) - -static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset) -{ - BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0); - return ®s->r15 + (offset / sizeof(regs->r15)); -} - -static u16 get_segment_reg(struct task_struct *task, unsigned long offset) -{ - /* - * Returning the value truncates it to 16 bits. - */ - unsigned int seg; - - switch (offset) { - case offsetof(struct user_regs_struct, fs): - if (task == current) { - /* Older gas can't assemble movq %?s,%r?? */ - asm("movl %%fs,%0" : "=r" (seg)); - return seg; - } - return task->thread.fsindex; - case offsetof(struct user_regs_struct, gs): - if (task == current) { - asm("movl %%gs,%0" : "=r" (seg)); - return seg; - } - return task->thread.gsindex; - case offsetof(struct user_regs_struct, ds): - if (task == current) { - asm("movl %%ds,%0" : "=r" (seg)); - return seg; - } - return task->thread.ds; - case offsetof(struct user_regs_struct, es): - if (task == current) { - asm("movl %%es,%0" : "=r" (seg)); - return seg; - } - return task->thread.es; - - case offsetof(struct user_regs_struct, cs): - case offsetof(struct user_regs_struct, ss): - break; - } - return *pt_regs_access(task_pt_regs(task), offset); -} - -static int set_segment_reg(struct task_struct *task, - unsigned long offset, u16 value) -{ - /* - * The value argument was already truncated to 16 bits. - */ - if (invalid_selector(value)) - return -EIO; - - switch (offset) { - case offsetof(struct user_regs_struct,fs): - /* - * If this is setting fs as for normal 64-bit use but - * setting fs_base has implicitly changed it, leave it. - */ - if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && - task->thread.fs != 0) || - (value == 0 && task->thread.fsindex == FS_TLS_SEL && - task->thread.fs == 0)) - break; - task->thread.fsindex = value; - if (task == current) - loadsegment(fs, task->thread.fsindex); - break; - case offsetof(struct user_regs_struct,gs): - /* - * If this is setting gs as for normal 64-bit use but - * setting gs_base has implicitly changed it, leave it. - */ - if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && - task->thread.gs != 0) || - (value == 0 && task->thread.gsindex == GS_TLS_SEL && - task->thread.gs == 0)) - break; - task->thread.gsindex = value; - if (task == current) - load_gs_index(task->thread.gsindex); - break; - case offsetof(struct user_regs_struct,ds): - task->thread.ds = value; - if (task == current) - loadsegment(ds, task->thread.ds); - break; - case offsetof(struct user_regs_struct,es): - task->thread.es = value; - if (task == current) - loadsegment(es, task->thread.es); - break; - - /* - * Can't actually change these in 64-bit mode. - */ - case offsetof(struct user_regs_struct,cs): - if (unlikely(value == 0)) - return -EIO; -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - task_pt_regs(task)->cs = value; -#endif - break; - case offsetof(struct user_regs_struct,ss): - if (unlikely(value == 0)) - return -EIO; -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - task_pt_regs(task)->ss = value; -#endif - break; - } - - return 0; -} - -#endif /* CONFIG_X86_32 */ - -static unsigned long get_flags(struct task_struct *task) -{ - unsigned long retval = task_pt_regs(task)->flags; - - /* - * If the debugger set TF, hide it from the readout. - */ - if (test_tsk_thread_flag(task, TIF_FORCED_TF)) - retval &= ~X86_EFLAGS_TF; - - return retval; -} - -static int set_flags(struct task_struct *task, unsigned long value) -{ - struct pt_regs *regs = task_pt_regs(task); - - /* - * If the user value contains TF, mark that - * it was not "us" (the debugger) that set it. - * If not, make sure it stays set if we had. - */ - if (value & X86_EFLAGS_TF) - clear_tsk_thread_flag(task, TIF_FORCED_TF); - else if (test_tsk_thread_flag(task, TIF_FORCED_TF)) - value |= X86_EFLAGS_TF; - - regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK); - - return 0; -} - -static int putreg(struct task_struct *child, - unsigned long offset, unsigned long value) -{ - switch (offset) { - case offsetof(struct user_regs_struct, cs): - case offsetof(struct user_regs_struct, ds): - case offsetof(struct user_regs_struct, es): - case offsetof(struct user_regs_struct, fs): - case offsetof(struct user_regs_struct, gs): - case offsetof(struct user_regs_struct, ss): - return set_segment_reg(child, offset, value); - - case offsetof(struct user_regs_struct, flags): - return set_flags(child, value); - -#ifdef CONFIG_X86_64 - case offsetof(struct user_regs_struct,fs_base): - if (value >= TASK_SIZE_OF(child)) - return -EIO; - /* - * When changing the segment base, use do_arch_prctl - * to set either thread.fs or thread.fsindex and the - * corresponding GDT slot. - */ - if (child->thread.fs != value) - return do_arch_prctl(child, ARCH_SET_FS, value); - return 0; - case offsetof(struct user_regs_struct,gs_base): - /* - * Exactly the same here as the %fs handling above. - */ - if (value >= TASK_SIZE_OF(child)) - return -EIO; - if (child->thread.gs != value) - return do_arch_prctl(child, ARCH_SET_GS, value); - return 0; -#endif - } - - *pt_regs_access(task_pt_regs(child), offset) = value; - return 0; -} - -static unsigned long getreg(struct task_struct *task, unsigned long offset) -{ - switch (offset) { - case offsetof(struct user_regs_struct, cs): - case offsetof(struct user_regs_struct, ds): - case offsetof(struct user_regs_struct, es): - case offsetof(struct user_regs_struct, fs): - case offsetof(struct user_regs_struct, gs): - case offsetof(struct user_regs_struct, ss): - return get_segment_reg(task, offset); - - case offsetof(struct user_regs_struct, flags): - return get_flags(task); - -#ifdef CONFIG_X86_64 - case offsetof(struct user_regs_struct, fs_base): { - /* - * do_arch_prctl may have used a GDT slot instead of - * the MSR. To userland, it appears the same either - * way, except the %fs segment selector might not be 0. - */ - unsigned int seg = task->thread.fsindex; - if (task->thread.fs != 0) - return task->thread.fs; - if (task == current) - asm("movl %%fs,%0" : "=r" (seg)); - if (seg != FS_TLS_SEL) - return 0; - return get_desc_base(&task->thread.tls_array[FS_TLS]); - } - case offsetof(struct user_regs_struct, gs_base): { - /* - * Exactly the same here as the %fs handling above. - */ - unsigned int seg = task->thread.gsindex; - if (task->thread.gs != 0) - return task->thread.gs; - if (task == current) - asm("movl %%gs,%0" : "=r" (seg)); - if (seg != GS_TLS_SEL) - return 0; - return get_desc_base(&task->thread.tls_array[GS_TLS]); - } -#endif - } - - return *pt_regs_access(task_pt_regs(task), offset); -} - -static int genregs_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - if (kbuf) { - unsigned long *k = kbuf; - while (count >= sizeof(*k)) { - *k++ = getreg(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - unsigned long __user *u = ubuf; - while (count >= sizeof(*u)) { - if (__put_user(getreg(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } - - return 0; -} - -static int genregs_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - if (kbuf) { - const unsigned long *k = kbuf; - while (count >= sizeof(*k) && !ret) { - ret = putreg(target, pos, *k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - const unsigned long __user *u = ubuf; - while (count >= sizeof(*u) && !ret) { - unsigned long word; - ret = __get_user(word, u++); - if (ret) - break; - ret = putreg(target, pos, word); - count -= sizeof(*u); - pos += sizeof(*u); - } - } - return ret; -} - -static void ptrace_triggered(struct perf_event *bp, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - int i; - struct thread_struct *thread = &(current->thread); - - /* - * Store in the virtual DR6 register the fact that the breakpoint - * was hit so the thread's debugger will see it. - */ - for (i = 0; i < HBP_NUM; i++) { - if (thread->ptrace_bps[i] == bp) - break; - } - - thread->debugreg6 |= (DR_TRAP0 << i); -} - -/* - * Walk through every ptrace breakpoints for this thread and - * build the dr7 value on top of their attributes. - * - */ -static unsigned long ptrace_get_dr7(struct perf_event *bp[]) -{ - int i; - int dr7 = 0; - struct arch_hw_breakpoint *info; - - for (i = 0; i < HBP_NUM; i++) { - if (bp[i] && !bp[i]->attr.disabled) { - info = counter_arch_bp(bp[i]); - dr7 |= encode_dr7(i, info->len, info->type); - } - } - - return dr7; -} - -static int -ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, - struct task_struct *tsk, int disabled) -{ - int err; - int gen_len, gen_type; - struct perf_event_attr attr; - - /* - * We should have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) - return -EINVAL; - - err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); - if (err) - return err; - - attr = bp->attr; - attr.bp_len = gen_len; - attr.bp_type = gen_type; - attr.disabled = disabled; - - return modify_user_hw_breakpoint(bp, &attr); -} - -/* - * Handle ptrace writes to debug register 7. - */ -static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) -{ - struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7; - int i, orig_ret = 0, rc = 0; - int enabled, second_pass = 0; - unsigned len, type; - struct perf_event *bp; - - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; - - data &= ~DR_CONTROL_RESERVED; - old_dr7 = ptrace_get_dr7(thread->ptrace_bps); -restore: - /* - * Loop through all the hardware breakpoints, making the - * appropriate changes to each. - */ - for (i = 0; i < HBP_NUM; i++) { - enabled = decode_dr7(data, i, &len, &type); - bp = thread->ptrace_bps[i]; - - if (!enabled) { - if (bp) { - /* - * Don't unregister the breakpoints right-away, - * unless all register_user_hw_breakpoint() - * requests have succeeded. This prevents - * any window of opportunity for debug - * register grabbing by other users. - */ - if (!second_pass) - continue; - - rc = ptrace_modify_breakpoint(bp, len, type, - tsk, 1); - if (rc) - break; - } - continue; - } - - rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); - if (rc) - break; - } - /* - * Make a second pass to free the remaining unused breakpoints - * or to restore the original breakpoints if an error occurred. - */ - if (!second_pass) { - second_pass = 1; - if (rc < 0) { - orig_ret = rc; - data = old_dr7; - } - goto restore; - } - - ptrace_put_breakpoints(tsk); - - return ((orig_ret < 0) ? orig_ret : rc); -} - -/* - * Handle PTRACE_PEEKUSR calls for the debug register area. - */ -static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) -{ - struct thread_struct *thread = &(tsk->thread); - unsigned long val = 0; - - if (n < HBP_NUM) { - struct perf_event *bp; - - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; - - bp = thread->ptrace_bps[n]; - if (!bp) - val = 0; - else - val = bp->hw.info.address; - - ptrace_put_breakpoints(tsk); - } else if (n == 6) { - val = thread->debugreg6; - } else if (n == 7) { - val = thread->ptrace_dr7; - } - return val; -} - -static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, - unsigned long addr) -{ - struct perf_event *bp; - struct thread_struct *t = &tsk->thread; - struct perf_event_attr attr; - int err = 0; - - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; - - if (!t->ptrace_bps[nr]) { - ptrace_breakpoint_init(&attr); - /* - * Put stub len and type to register (reserve) an inactive but - * correct bp - */ - attr.bp_addr = addr; - attr.bp_len = HW_BREAKPOINT_LEN_1; - attr.bp_type = HW_BREAKPOINT_W; - attr.disabled = 1; - - bp = register_user_hw_breakpoint(&attr, ptrace_triggered, - NULL, tsk); - - /* - * CHECKME: the previous code returned -EIO if the addr wasn't - * a valid task virtual addr. The new one will return -EINVAL in - * this case. - * -EINVAL may be what we want for in-kernel breakpoints users, - * but -EIO looks better for ptrace, since we refuse a register - * writing for the user. And anyway this is the previous - * behaviour. - */ - if (IS_ERR(bp)) { - err = PTR_ERR(bp); - goto put; - } - - t->ptrace_bps[nr] = bp; - } else { - bp = t->ptrace_bps[nr]; - - attr = bp->attr; - attr.bp_addr = addr; - err = modify_user_hw_breakpoint(bp, &attr); - } - -put: - ptrace_put_breakpoints(tsk); - return err; -} - -/* - * Handle PTRACE_POKEUSR calls for the debug register area. - */ -static int ptrace_set_debugreg(struct task_struct *tsk, int n, - unsigned long val) -{ - struct thread_struct *thread = &(tsk->thread); - int rc = 0; - - /* There are no DR4 or DR5 registers */ - if (n == 4 || n == 5) - return -EIO; - - if (n == 6) { - thread->debugreg6 = val; - goto ret_path; - } - if (n < HBP_NUM) { - rc = ptrace_set_breakpoint_addr(tsk, n, val); - if (rc) - return rc; - } - /* All that's left is DR7 */ - if (n == 7) { - rc = ptrace_write_dr7(tsk, val); - if (!rc) - thread->ptrace_dr7 = val; - } - -ret_path: - return rc; -} - -/* - * These access the current or another (stopped) task's io permission - * bitmap for debugging or core dump. - */ -static int ioperm_active(struct task_struct *target, - const struct user_regset *regset) -{ - return target->thread.io_bitmap_max / regset->size; -} - -static int ioperm_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - if (!target->thread.io_bitmap_ptr) - return -ENXIO; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.io_bitmap_ptr, - 0, IO_BITMAP_BYTES); -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. - */ -void ptrace_disable(struct task_struct *child) -{ - user_disable_single_step(child); -#ifdef TIF_SYSCALL_EMU - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -#endif -} - -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION -static const struct user_regset_view user_x86_32_view; /* Initialized below. */ -#endif - -long arch_ptrace(struct task_struct *child, long request, - unsigned long addr, unsigned long data) -{ - int ret; - unsigned long __user *datap = (unsigned long __user *)data; - - switch (request) { - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - - ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) - break; - - tmp = 0; /* Default return condition */ - if (addr < sizeof(struct user_regs_struct)) - tmp = getreg(child, addr); - else if (addr >= offsetof(struct user, u_debugreg[0]) && - addr <= offsetof(struct user, u_debugreg[7])) { - addr -= offsetof(struct user, u_debugreg[0]); - tmp = ptrace_get_debugreg(child, addr / sizeof(data)); - } - ret = put_user(tmp, datap); - break; - } - - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) - break; - - if (addr < sizeof(struct user_regs_struct)) - ret = putreg(child, addr, data); - else if (addr >= offsetof(struct user, u_debugreg[0]) && - addr <= offsetof(struct user, u_debugreg[7])) { - addr -= offsetof(struct user, u_debugreg[0]); - ret = ptrace_set_debugreg(child, - addr / sizeof(data), data); - } - break; - - case PTRACE_GETREGS: /* Get all gp regs from the child. */ - return copy_regset_to_user(child, - task_user_regset_view(current), - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct), - datap); - - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, - task_user_regset_view(current), - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct), - datap); - - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - return copy_regset_to_user(child, - task_user_regset_view(current), - REGSET_FP, - 0, sizeof(struct user_i387_struct), - datap); - - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - return copy_regset_from_user(child, - task_user_regset_view(current), - REGSET_FP, - 0, sizeof(struct user_i387_struct), - datap); - -#ifdef CONFIG_X86_32 - case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_XFP, - 0, sizeof(struct user_fxsr_struct), - datap) ? -EIO : 0; - - case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_XFP, - 0, sizeof(struct user_fxsr_struct), - datap) ? -EIO : 0; -#endif - -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION - case PTRACE_GET_THREAD_AREA: - if ((int) addr < 0) - return -EIO; - ret = do_get_thread_area(child, addr, - (struct user_desc __user *)data); - break; - - case PTRACE_SET_THREAD_AREA: - if ((int) addr < 0) - return -EIO; - ret = do_set_thread_area(child, addr, - (struct user_desc __user *)data, 0); - break; -#endif - -#ifdef CONFIG_X86_64 - /* normal 64bit interface to access TLS data. - Works just like arch_prctl, except that the arguments - are reversed. */ - case PTRACE_ARCH_PRCTL: - ret = do_arch_prctl(child, data, addr); - break; -#endif - - default: - ret = ptrace_request(child, request, addr, data); - break; - } - - return ret; -} - -#ifdef CONFIG_IA32_EMULATION - -#include <linux/compat.h> -#include <linux/syscalls.h> -#include <asm/ia32.h> -#include <asm/user32.h> - -#define R32(l,q) \ - case offsetof(struct user32, regs.l): \ - regs->q = value; break - -#define SEG32(rs) \ - case offsetof(struct user32, regs.rs): \ - return set_segment_reg(child, \ - offsetof(struct user_regs_struct, rs), \ - value); \ - break - -static int putreg32(struct task_struct *child, unsigned regno, u32 value) -{ - struct pt_regs *regs = task_pt_regs(child); - - switch (regno) { - - SEG32(cs); - SEG32(ds); - SEG32(es); - SEG32(fs); - SEG32(gs); - SEG32(ss); - - R32(ebx, bx); - R32(ecx, cx); - R32(edx, dx); - R32(edi, di); - R32(esi, si); - R32(ebp, bp); - R32(eax, ax); - R32(eip, ip); - R32(esp, sp); - - case offsetof(struct user32, regs.orig_eax): - /* - * A 32-bit debugger setting orig_eax means to restore - * the state of the task restarting a 32-bit syscall. - * Make sure we interpret the -ERESTART* codes correctly - * in case the task is not actually still sitting at the - * exit from a 32-bit syscall with TS_COMPAT still set. - */ - regs->orig_ax = value; - if (syscall_get_nr(child, regs) >= 0) - task_thread_info(child)->status |= TS_COMPAT; - break; - - case offsetof(struct user32, regs.eflags): - return set_flags(child, value); - - case offsetof(struct user32, u_debugreg[0]) ... - offsetof(struct user32, u_debugreg[7]): - regno -= offsetof(struct user32, u_debugreg[0]); - return ptrace_set_debugreg(child, regno / 4, value); - - default: - if (regno > sizeof(struct user32) || (regno & 3)) - return -EIO; - - /* - * Other dummy fields in the virtual user structure - * are ignored - */ - break; - } - return 0; -} - -#undef R32 -#undef SEG32 - -#define R32(l,q) \ - case offsetof(struct user32, regs.l): \ - *val = regs->q; break - -#define SEG32(rs) \ - case offsetof(struct user32, regs.rs): \ - *val = get_segment_reg(child, \ - offsetof(struct user_regs_struct, rs)); \ - break - -static int getreg32(struct task_struct *child, unsigned regno, u32 *val) -{ - struct pt_regs *regs = task_pt_regs(child); - - switch (regno) { - - SEG32(ds); - SEG32(es); - SEG32(fs); - SEG32(gs); - - R32(cs, cs); - R32(ss, ss); - R32(ebx, bx); - R32(ecx, cx); - R32(edx, dx); - R32(edi, di); - R32(esi, si); - R32(ebp, bp); - R32(eax, ax); - R32(orig_eax, orig_ax); - R32(eip, ip); - R32(esp, sp); - - case offsetof(struct user32, regs.eflags): - *val = get_flags(child); - break; - - case offsetof(struct user32, u_debugreg[0]) ... - offsetof(struct user32, u_debugreg[7]): - regno -= offsetof(struct user32, u_debugreg[0]); - *val = ptrace_get_debugreg(child, regno / 4); - break; - - default: - if (regno > sizeof(struct user32) || (regno & 3)) - return -EIO; - - /* - * Other dummy fields in the virtual user structure - * are ignored - */ - *val = 0; - break; - } - return 0; -} - -#undef R32 -#undef SEG32 - -static int genregs32_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count >= sizeof(*k)) { - getreg32(target, pos, k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count >= sizeof(*u)) { - compat_ulong_t word; - getreg32(target, pos, &word); - if (__put_user(word, u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } - - return 0; -} - -static int genregs32_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - if (kbuf) { - const compat_ulong_t *k = kbuf; - while (count >= sizeof(*k) && !ret) { - ret = putreg32(target, pos, *k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - const compat_ulong_t __user *u = ubuf; - while (count >= sizeof(*u) && !ret) { - compat_ulong_t word; - ret = __get_user(word, u++); - if (ret) - break; - ret = putreg32(target, pos, word); - count -= sizeof(*u); - pos += sizeof(*u); - } - } - return ret; -} - -#ifdef CONFIG_X86_X32_ABI -static long x32_arch_ptrace(struct task_struct *child, - compat_long_t request, compat_ulong_t caddr, - compat_ulong_t cdata) -{ - unsigned long addr = caddr; - unsigned long data = cdata; - void __user *datap = compat_ptr(data); - int ret; - - switch (request) { - /* Read 32bits at location addr in the USER area. Only allow - to return the lower 32bits of segment and debug registers. */ - case PTRACE_PEEKUSR: { - u32 tmp; - - ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || - addr < offsetof(struct user_regs_struct, cs)) - break; - - tmp = 0; /* Default return condition */ - if (addr < sizeof(struct user_regs_struct)) - tmp = getreg(child, addr); - else if (addr >= offsetof(struct user, u_debugreg[0]) && - addr <= offsetof(struct user, u_debugreg[7])) { - addr -= offsetof(struct user, u_debugreg[0]); - tmp = ptrace_get_debugreg(child, addr / sizeof(data)); - } - ret = put_user(tmp, (__u32 __user *)datap); - break; - } - - /* Write the word at location addr in the USER area. Only allow - to update segment and debug registers with the upper 32bits - zero-extended. */ - case PTRACE_POKEUSR: - ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || - addr < offsetof(struct user_regs_struct, cs)) - break; - - if (addr < sizeof(struct user_regs_struct)) - ret = putreg(child, addr, data); - else if (addr >= offsetof(struct user, u_debugreg[0]) && - addr <= offsetof(struct user, u_debugreg[7])) { - addr -= offsetof(struct user, u_debugreg[0]); - ret = ptrace_set_debugreg(child, - addr / sizeof(data), data); - } - break; - - case PTRACE_GETREGS: /* Get all gp regs from the child. */ - return copy_regset_to_user(child, - task_user_regset_view(current), - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct), - datap); - - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, - task_user_regset_view(current), - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct), - datap); - - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - return copy_regset_to_user(child, - task_user_regset_view(current), - REGSET_FP, - 0, sizeof(struct user_i387_struct), - datap); - - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - return copy_regset_from_user(child, - task_user_regset_view(current), - REGSET_FP, - 0, sizeof(struct user_i387_struct), - datap); - - default: - return compat_ptrace_request(child, request, addr, data); - } - - return ret; -} -#endif - -long compat_arch_ptrace(struct task_struct *child, compat_long_t request, - compat_ulong_t caddr, compat_ulong_t cdata) -{ - unsigned long addr = caddr; - unsigned long data = cdata; - void __user *datap = compat_ptr(data); - int ret; - __u32 val; - -#ifdef CONFIG_X86_X32_ABI - if (!is_ia32_task()) - return x32_arch_ptrace(child, request, caddr, cdata); -#endif - - switch (request) { - case PTRACE_PEEKUSR: - ret = getreg32(child, addr, &val); - if (ret == 0) - ret = put_user(val, (__u32 __user *)datap); - break; - - case PTRACE_POKEUSR: - ret = putreg32(child, addr, data); - break; - - case PTRACE_GETREGS: /* Get all gp regs from the child. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct32), - datap); - - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_GENERAL, 0, - sizeof(struct user_regs_struct32), - datap); - - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_FP, 0, - sizeof(struct user_i387_ia32_struct), - datap); - - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - return copy_regset_from_user( - child, &user_x86_32_view, REGSET_FP, - 0, sizeof(struct user_i387_ia32_struct), datap); - - case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_GET_THREAD_AREA: - case PTRACE_SET_THREAD_AREA: - return arch_ptrace(child, request, addr, data); - - default: - return compat_ptrace_request(child, request, addr, data); - } - - return ret; -} - -#endif /* CONFIG_IA32_EMULATION */ - -#ifdef CONFIG_X86_64 - -static struct user_regset x86_64_regsets[] __read_mostly = { - [REGSET_GENERAL] = { - .core_note_type = NT_PRSTATUS, - .n = sizeof(struct user_regs_struct) / sizeof(long), - .size = sizeof(long), .align = sizeof(long), - .get = genregs_get, .set = genregs_set - }, - [REGSET_FP] = { - .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(long), - .size = sizeof(long), .align = sizeof(long), - .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set - }, - [REGSET_XSTATE] = { - .core_note_type = NT_X86_XSTATE, - .size = sizeof(u64), .align = sizeof(u64), - .active = xstateregs_active, .get = xstateregs_get, - .set = xstateregs_set - }, - [REGSET_IOPERM64] = { - .core_note_type = NT_386_IOPERM, - .n = IO_BITMAP_LONGS, - .size = sizeof(long), .align = sizeof(long), - .active = ioperm_active, .get = ioperm_get - }, -}; - -static const struct user_regset_view user_x86_64_view = { - .name = "x86_64", .e_machine = EM_X86_64, - .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets) -}; - -#else /* CONFIG_X86_32 */ - -#define user_regs_struct32 user_regs_struct -#define genregs32_get genregs_get -#define genregs32_set genregs_set - -#define user_i387_ia32_struct user_i387_struct -#define user32_fxsr_struct user_fxsr_struct - -#endif /* CONFIG_X86_64 */ - -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION -static struct user_regset x86_32_regsets[] __read_mostly = { - [REGSET_GENERAL] = { - .core_note_type = NT_PRSTATUS, - .n = sizeof(struct user_regs_struct32) / sizeof(u32), - .size = sizeof(u32), .align = sizeof(u32), - .get = genregs32_get, .set = genregs32_set - }, - [REGSET_FP] = { - .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), - .size = sizeof(u32), .align = sizeof(u32), - .active = fpregs_active, .get = fpregs_get, .set = fpregs_set - }, - [REGSET_XFP] = { - .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), - .size = sizeof(u32), .align = sizeof(u32), - .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set - }, - [REGSET_XSTATE] = { - .core_note_type = NT_X86_XSTATE, - .size = sizeof(u64), .align = sizeof(u64), - .active = xstateregs_active, .get = xstateregs_get, - .set = xstateregs_set - }, - [REGSET_TLS] = { - .core_note_type = NT_386_TLS, - .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, - .size = sizeof(struct user_desc), - .align = sizeof(struct user_desc), - .active = regset_tls_active, - .get = regset_tls_get, .set = regset_tls_set - }, - [REGSET_IOPERM32] = { - .core_note_type = NT_386_IOPERM, - .n = IO_BITMAP_BYTES / sizeof(u32), - .size = sizeof(u32), .align = sizeof(u32), - .active = ioperm_active, .get = ioperm_get - }, -}; - -static const struct user_regset_view user_x86_32_view = { - .name = "i386", .e_machine = EM_386, - .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets) -}; -#endif - -/* - * This represents bytes 464..511 in the memory layout exported through - * the REGSET_XSTATE interface. - */ -u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; - -void update_regset_xstate_info(unsigned int size, u64 xstate_mask) -{ -#ifdef CONFIG_X86_64 - x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); -#endif -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION - x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64); -#endif - xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; -} - -const struct user_regset_view *task_user_regset_view(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) -#endif -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION - return &user_x86_32_view; -#endif -#ifdef CONFIG_X86_64 - return &user_x86_64_view; -#endif -} - -static void fill_sigtrap_info(struct task_struct *tsk, - struct pt_regs *regs, - int error_code, int si_code, - struct siginfo *info) -{ - tsk->thread.trap_nr = X86_TRAP_DB; - tsk->thread.error_code = error_code; - - memset(info, 0, sizeof(*info)); - info->si_signo = SIGTRAP; - info->si_code = si_code; - info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; -} - -void user_single_step_siginfo(struct task_struct *tsk, - struct pt_regs *regs, - struct siginfo *info) -{ - fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); -} - -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code) -{ - struct siginfo info; - - fill_sigtrap_info(tsk, regs, error_code, si_code, &info); - /* Send us the fake SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); -} - - -#ifdef CONFIG_X86_32 -# define IS_IA32 1 -#elif defined CONFIG_IA32_EMULATION -# define IS_IA32 is_compat_task() -#else -# define IS_IA32 0 -#endif - -/* - * We must return the syscall number to actually look up in the table. - * This can be -1L to skip running any syscall at all. - */ -long syscall_trace_enter(struct pt_regs *regs) -{ - long ret = 0; - - /* - * If we stepped into a sysenter/syscall insn, it trapped in - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. - * If user-mode had set TF itself, then it's still clear from - * do_debug() and we need to set it again to restore the user - * state. If we entered on the slow path, TF was already set. - */ - if (test_thread_flag(TIF_SINGLESTEP)) - regs->flags |= X86_EFLAGS_TF; - - /* do the secure computing check first */ - secure_computing(regs->orig_ax); - - if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) - ret = -1L; - - if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - ret = -1L; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->orig_ax); - - if (IS_IA32) - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_ax, - regs->bx, regs->cx, - regs->dx, regs->si); -#ifdef CONFIG_X86_64 - else - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_ax, - regs->di, regs->si, - regs->dx, regs->r10); -#endif - - return ret ?: regs->orig_ax; -} - -void syscall_trace_leave(struct pt_regs *regs) -{ - bool step; - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->ax); - - /* - * If TIF_SYSCALL_EMU is set, we only get here because of - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). - * We already reported this syscall instruction in - * syscall_trace_enter(). - */ - step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && - !test_thread_flag(TIF_SYSCALL_EMU); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/pvclock.c b/ANDROID_3.4.5/arch/x86/kernel/pvclock.c deleted file mode 100644 index 42eb3300..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/pvclock.c +++ /dev/null @@ -1,158 +0,0 @@ -/* paravirtual clock -- common code used by kvm/xen - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include <linux/kernel.h> -#include <linux/percpu.h> -#include <asm/pvclock.h> - -/* - * These are perodically updated - * xen: magic shared_info page - * kvm: gpa registered via msr - * and then copied here. - */ -struct pvclock_shadow_time { - u64 tsc_timestamp; /* TSC at last update of time vals. */ - u64 system_timestamp; /* Time, in nanosecs, since boot. */ - u32 tsc_to_nsec_mul; - int tsc_shift; - u32 version; - u8 flags; -}; - -static u8 valid_flags __read_mostly = 0; - -void pvclock_set_flags(u8 flags) -{ - valid_flags = flags; -} - -static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) -{ - u64 delta = native_read_tsc() - shadow->tsc_timestamp; - return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul, - shadow->tsc_shift); -} - -/* - * Reads a consistent set of time-base values from hypervisor, - * into a shadow data area. - */ -static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, - struct pvclock_vcpu_time_info *src) -{ - do { - dst->version = src->version; - rmb(); /* fetch version before data */ - dst->tsc_timestamp = src->tsc_timestamp; - dst->system_timestamp = src->system_time; - dst->tsc_to_nsec_mul = src->tsc_to_system_mul; - dst->tsc_shift = src->tsc_shift; - dst->flags = src->flags; - rmb(); /* test version after fetching data */ - } while ((src->version & 1) || (dst->version != src->version)); - - return dst->version; -} - -unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) -{ - u64 pv_tsc_khz = 1000000ULL << 32; - - do_div(pv_tsc_khz, src->tsc_to_system_mul); - if (src->tsc_shift < 0) - pv_tsc_khz <<= -src->tsc_shift; - else - pv_tsc_khz >>= src->tsc_shift; - return pv_tsc_khz; -} - -static atomic64_t last_value = ATOMIC64_INIT(0); - -void pvclock_resume(void) -{ - atomic64_set(&last_value, 0); -} - -cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) -{ - struct pvclock_shadow_time shadow; - unsigned version; - cycle_t ret, offset; - u64 last; - - do { - version = pvclock_get_time_values(&shadow, src); - barrier(); - offset = pvclock_get_nsec_offset(&shadow); - ret = shadow.system_timestamp + offset; - barrier(); - } while (version != src->version); - - if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && - (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) - return ret; - - /* - * Assumption here is that last_value, a global accumulator, always goes - * forward. If we are less than that, we should not be much smaller. - * We assume there is an error marging we're inside, and then the correction - * does not sacrifice accuracy. - * - * For reads: global may have changed between test and return, - * but this means someone else updated poked the clock at a later time. - * We just need to make sure we are not seeing a backwards event. - * - * For updates: last_value = ret is not enough, since two vcpus could be - * updating at the same time, and one of them could be slightly behind, - * making the assumption that last_value always go forward fail to hold. - */ - last = atomic64_read(&last_value); - do { - if (ret < last) - return last; - last = atomic64_cmpxchg(&last_value, last, ret); - } while (unlikely(last != ret)); - - return ret; -} - -void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, - struct pvclock_vcpu_time_info *vcpu_time, - struct timespec *ts) -{ - u32 version; - u64 delta; - struct timespec now; - - /* get wallclock at system boot */ - do { - version = wall_clock->version; - rmb(); /* fetch version before time */ - now.tv_sec = wall_clock->sec; - now.tv_nsec = wall_clock->nsec; - rmb(); /* fetch time before checking version */ - } while ((wall_clock->version & 1) || (version != wall_clock->version)); - - delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ - delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; - - now.tv_nsec = do_div(delta, NSEC_PER_SEC); - now.tv_sec = delta; - - set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/quirks.c b/ANDROID_3.4.5/arch/x86/kernel/quirks.c deleted file mode 100644 index 03920a15..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/quirks.c +++ /dev/null @@ -1,569 +0,0 @@ -/* - * This file contains work-arounds for x86 and x86_64 platform bugs. - */ -#include <linux/pci.h> -#include <linux/irq.h> - -#include <asm/hpet.h> - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) - -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) -{ - u8 config; - u16 word; - - /* BIOS may enable hardware IRQ balancing for - * E7520/E7320/E7525(revision ID 0x9 and below) - * based platforms. - * Disable SW irqbalance/affinity on those platforms. - */ - if (dev->revision > 0x9) - return; - - /* enable access to config space*/ - pci_read_config_byte(dev, 0xf4, &config); - pci_write_config_byte(dev, 0xf4, config|0x2); - - /* - * read xTPR register. We may not have a pci_dev for device 8 - * because it might be hidden until the above write. - */ - pci_bus_read_config_word(dev->bus, PCI_DEVFN(8, 0), 0x4c, &word); - - if (!(word & (1 << 13))) { - dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " - "disabling irq balancing and affinity\n"); - noirqdebug_setup(""); -#ifdef CONFIG_PROC_FS - no_irq_affinity = 1; -#endif - } - - /* put back the original value for config space*/ - if (!(config & 0x2)) - pci_write_config_byte(dev, 0xf4, config); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, - quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, - quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, - quirk_intel_irqbalance); -#endif - -#if defined(CONFIG_HPET_TIMER) -unsigned long force_hpet_address; - -static enum { - NONE_FORCE_HPET_RESUME, - OLD_ICH_FORCE_HPET_RESUME, - ICH_FORCE_HPET_RESUME, - VT8237_FORCE_HPET_RESUME, - NVIDIA_FORCE_HPET_RESUME, - ATI_FORCE_HPET_RESUME, -} force_hpet_resume_type; - -static void __iomem *rcba_base; - -static void ich_force_hpet_resume(void) -{ - u32 val; - - if (!force_hpet_address) - return; - - BUG_ON(rcba_base == NULL); - - /* read the Function Disable register, dword mode only */ - val = readl(rcba_base + 0x3404); - if (!(val & 0x80)) { - /* HPET disabled in HPTC. Trying to enable */ - writel(val | 0x80, rcba_base + 0x3404); - } - - val = readl(rcba_base + 0x3404); - if (!(val & 0x80)) - BUG(); - else - printk(KERN_DEBUG "Force enabled HPET at resume\n"); - - return; -} - -static void ich_force_enable_hpet(struct pci_dev *dev) -{ - u32 val; - u32 uninitialized_var(rcba); - int err = 0; - - if (hpet_address || force_hpet_address) - return; - - pci_read_config_dword(dev, 0xF0, &rcba); - rcba &= 0xFFFFC000; - if (rcba == 0) { - dev_printk(KERN_DEBUG, &dev->dev, "RCBA disabled; " - "cannot force enable HPET\n"); - return; - } - - /* use bits 31:14, 16 kB aligned */ - rcba_base = ioremap_nocache(rcba, 0x4000); - if (rcba_base == NULL) { - dev_printk(KERN_DEBUG, &dev->dev, "ioremap failed; " - "cannot force enable HPET\n"); - return; - } - - /* read the Function Disable register, dword mode only */ - val = readl(rcba_base + 0x3404); - - if (val & 0x80) { - /* HPET is enabled in HPTC. Just not reported by BIOS */ - val = val & 0x3; - force_hpet_address = 0xFED00000 | (val << 12); - dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " - "0x%lx\n", force_hpet_address); - iounmap(rcba_base); - return; - } - - /* HPET disabled in HPTC. Trying to enable */ - writel(val | 0x80, rcba_base + 0x3404); - - val = readl(rcba_base + 0x3404); - if (!(val & 0x80)) { - err = 1; - } else { - val = val & 0x3; - force_hpet_address = 0xFED00000 | (val << 12); - } - - if (err) { - force_hpet_address = 0; - iounmap(rcba_base); - dev_printk(KERN_DEBUG, &dev->dev, - "Failed to force enable HPET\n"); - } else { - force_hpet_resume_type = ICH_FORCE_HPET_RESUME; - dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " - "0x%lx\n", force_hpet_address); - } -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, - ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */ - ich_force_enable_hpet); - -static struct pci_dev *cached_dev; - -static void hpet_print_force_info(void) -{ - printk(KERN_INFO "HPET not enabled in BIOS. " - "You might try hpet=force boot option\n"); -} - -static void old_ich_force_hpet_resume(void) -{ - u32 val; - u32 uninitialized_var(gen_cntl); - - if (!force_hpet_address || !cached_dev) - return; - - pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); - gen_cntl &= (~(0x7 << 15)); - gen_cntl |= (0x4 << 15); - - pci_write_config_dword(cached_dev, 0xD0, gen_cntl); - pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); - val = gen_cntl >> 15; - val &= 0x7; - if (val == 0x4) - printk(KERN_DEBUG "Force enabled HPET at resume\n"); - else - BUG(); -} - -static void old_ich_force_enable_hpet(struct pci_dev *dev) -{ - u32 val; - u32 uninitialized_var(gen_cntl); - - if (hpet_address || force_hpet_address) - return; - - pci_read_config_dword(dev, 0xD0, &gen_cntl); - /* - * Bit 17 is HPET enable bit. - * Bit 16:15 control the HPET base address. - */ - val = gen_cntl >> 15; - val &= 0x7; - if (val & 0x4) { - val &= 0x3; - force_hpet_address = 0xFED00000 | (val << 12); - dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n", - force_hpet_address); - return; - } - - /* - * HPET is disabled. Trying enabling at FED00000 and check - * whether it sticks - */ - gen_cntl &= (~(0x7 << 15)); - gen_cntl |= (0x4 << 15); - pci_write_config_dword(dev, 0xD0, gen_cntl); - - pci_read_config_dword(dev, 0xD0, &gen_cntl); - - val = gen_cntl >> 15; - val &= 0x7; - if (val & 0x4) { - /* HPET is enabled in HPTC. Just not reported by BIOS */ - val &= 0x3; - force_hpet_address = 0xFED00000 | (val << 12); - dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " - "0x%lx\n", force_hpet_address); - cached_dev = dev; - force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME; - return; - } - - dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n"); -} - -/* - * Undocumented chipset features. Make sure that the user enforced - * this. - */ -static void old_ich_force_enable_hpet_user(struct pci_dev *dev) -{ - if (hpet_force_user) - old_ich_force_enable_hpet(dev); -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, - old_ich_force_enable_hpet_user); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, - old_ich_force_enable_hpet_user); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, - old_ich_force_enable_hpet_user); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, - old_ich_force_enable_hpet_user); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, - old_ich_force_enable_hpet_user); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, - old_ich_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, - old_ich_force_enable_hpet); - - -static void vt8237_force_hpet_resume(void) -{ - u32 val; - - if (!force_hpet_address || !cached_dev) - return; - - val = 0xfed00000 | 0x80; - pci_write_config_dword(cached_dev, 0x68, val); - - pci_read_config_dword(cached_dev, 0x68, &val); - if (val & 0x80) - printk(KERN_DEBUG "Force enabled HPET at resume\n"); - else - BUG(); -} - -static void vt8237_force_enable_hpet(struct pci_dev *dev) -{ - u32 uninitialized_var(val); - - if (hpet_address || force_hpet_address) - return; - - if (!hpet_force_user) { - hpet_print_force_info(); - return; - } - - pci_read_config_dword(dev, 0x68, &val); - /* - * Bit 7 is HPET enable bit. - * Bit 31:10 is HPET base address (contrary to what datasheet claims) - */ - if (val & 0x80) { - force_hpet_address = (val & ~0x3ff); - dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n", - force_hpet_address); - return; - } - - /* - * HPET is disabled. Trying enabling at FED00000 and check - * whether it sticks - */ - val = 0xfed00000 | 0x80; - pci_write_config_dword(dev, 0x68, val); - - pci_read_config_dword(dev, 0x68, &val); - if (val & 0x80) { - force_hpet_address = (val & ~0x3ff); - dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " - "0x%lx\n", force_hpet_address); - cached_dev = dev; - force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; - return; - } - - dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n"); -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, - vt8237_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, - vt8237_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_CX700, - vt8237_force_enable_hpet); - -static void ati_force_hpet_resume(void) -{ - pci_write_config_dword(cached_dev, 0x14, 0xfed00000); - printk(KERN_DEBUG "Force enabled HPET at resume\n"); -} - -static u32 ati_ixp4x0_rev(struct pci_dev *dev) -{ - u32 d; - u8 b; - - pci_read_config_byte(dev, 0xac, &b); - b &= ~(1<<5); - pci_write_config_byte(dev, 0xac, b); - pci_read_config_dword(dev, 0x70, &d); - d |= 1<<8; - pci_write_config_dword(dev, 0x70, d); - pci_read_config_dword(dev, 0x8, &d); - d &= 0xff; - dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d); - return d; -} - -static void ati_force_enable_hpet(struct pci_dev *dev) -{ - u32 d, val; - u8 b; - - if (hpet_address || force_hpet_address) - return; - - if (!hpet_force_user) { - hpet_print_force_info(); - return; - } - - d = ati_ixp4x0_rev(dev); - if (d < 0x82) - return; - - /* base address */ - pci_write_config_dword(dev, 0x14, 0xfed00000); - pci_read_config_dword(dev, 0x14, &val); - - /* enable interrupt */ - outb(0x72, 0xcd6); b = inb(0xcd7); - b |= 0x1; - outb(0x72, 0xcd6); outb(b, 0xcd7); - outb(0x72, 0xcd6); b = inb(0xcd7); - if (!(b & 0x1)) - return; - pci_read_config_dword(dev, 0x64, &d); - d |= (1<<10); - pci_write_config_dword(dev, 0x64, d); - pci_read_config_dword(dev, 0x64, &d); - if (!(d & (1<<10))) - return; - - force_hpet_address = val; - force_hpet_resume_type = ATI_FORCE_HPET_RESUME; - dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", - force_hpet_address); - cached_dev = dev; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, - ati_force_enable_hpet); - -/* - * Undocumented chipset feature taken from LinuxBIOS. - */ -static void nvidia_force_hpet_resume(void) -{ - pci_write_config_dword(cached_dev, 0x44, 0xfed00001); - printk(KERN_DEBUG "Force enabled HPET at resume\n"); -} - -static void nvidia_force_enable_hpet(struct pci_dev *dev) -{ - u32 uninitialized_var(val); - - if (hpet_address || force_hpet_address) - return; - - if (!hpet_force_user) { - hpet_print_force_info(); - return; - } - - pci_write_config_dword(dev, 0x44, 0xfed00001); - pci_read_config_dword(dev, 0x44, &val); - force_hpet_address = val & 0xfffffffe; - force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME; - dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", - force_hpet_address); - cached_dev = dev; - return; -} - -/* ISA Bridges */ -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0050, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051, - nvidia_force_enable_hpet); - -/* LPC bridges */ -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0260, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0362, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0363, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0364, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0365, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0366, - nvidia_force_enable_hpet); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367, - nvidia_force_enable_hpet); - -void force_hpet_resume(void) -{ - switch (force_hpet_resume_type) { - case ICH_FORCE_HPET_RESUME: - ich_force_hpet_resume(); - return; - case OLD_ICH_FORCE_HPET_RESUME: - old_ich_force_hpet_resume(); - return; - case VT8237_FORCE_HPET_RESUME: - vt8237_force_hpet_resume(); - return; - case NVIDIA_FORCE_HPET_RESUME: - nvidia_force_hpet_resume(); - return; - case ATI_FORCE_HPET_RESUME: - ati_force_hpet_resume(); - return; - default: - break; - } -} - -/* - * HPET MSI on some boards (ATI SB700/SB800) has side effect on - * floppy DMA. Disable HPET MSI on such platforms. - * See erratum #27 (Misinterpreted MSI Requests May Result in - * Corrupted LPC DMA Data) in AMD Publication #46837, - * "SB700 Family Product Errata", Rev. 1.0, March 2010. - */ -static void force_disable_hpet_msi(struct pci_dev *unused) -{ - hpet_msi_disable = 1; -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, - force_disable_hpet_msi); - -#endif - -#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) -/* Set correct numa_node information for AMD NB functions */ -static void __init quirk_amd_nb_node(struct pci_dev *dev) -{ - struct pci_dev *nb_ht; - unsigned int devfn; - u32 node; - u32 val; - - devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); - nb_ht = pci_get_slot(dev->bus, devfn); - if (!nb_ht) - return; - - pci_read_config_dword(nb_ht, 0x60, &val); - node = val & 7; - /* - * Some hardware may return an invalid node ID, - * so check it first: - */ - if (node_online(node)) - set_dev_node(&dev->dev, node); - pci_dev_put(nb_ht); -} - -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F0, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F1, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F2, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4, - quirk_amd_nb_node); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5, - quirk_amd_nb_node); - -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/reboot.c b/ANDROID_3.4.5/arch/x86/kernel/reboot.c deleted file mode 100644 index 3034ee5a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/reboot.c +++ /dev/null @@ -1,851 +0,0 @@ -#include <linux/module.h> -#include <linux/reboot.h> -#include <linux/init.h> -#include <linux/pm.h> -#include <linux/efi.h> -#include <linux/dmi.h> -#include <linux/sched.h> -#include <linux/tboot.h> -#include <linux/delay.h> -#include <acpi/reboot.h> -#include <asm/io.h> -#include <asm/apic.h> -#include <asm/desc.h> -#include <asm/hpet.h> -#include <asm/pgtable.h> -#include <asm/proto.h> -#include <asm/reboot_fixups.h> -#include <asm/reboot.h> -#include <asm/pci_x86.h> -#include <asm/virtext.h> -#include <asm/cpu.h> -#include <asm/nmi.h> - -#ifdef CONFIG_X86_32 -# include <linux/ctype.h> -# include <linux/mc146818rtc.h> -#else -# include <asm/x86_init.h> -#endif - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -static const struct desc_ptr no_idt = {}; -static int reboot_mode; -enum reboot_type reboot_type = BOOT_ACPI; -int reboot_force; - -/* This variable is used privately to keep track of whether or not - * reboot_type is still set to its default value (i.e., reboot= hasn't - * been set on the command line). This is needed so that we can - * suppress DMI scanning for reboot quirks. Without it, it's - * impossible to override a faulty reboot quirk without recompiling. - */ -static int reboot_default = 1; - -#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) -static int reboot_cpu = -1; -#endif - -/* This is set if we need to go through the 'emergency' path. - * When machine_emergency_restart() is called, we may be on - * an inconsistent state and won't be able to do a clean cleanup - */ -static int reboot_emergency; - -/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ -bool port_cf9_safe = false; - -/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] - warm Don't set the cold reboot flag - cold Set the cold reboot flag - bios Reboot by jumping through the BIOS (only for X86_32) - smp Reboot by executing reset on BSP or other CPU (only for X86_32) - triple Force a triple fault (init) - kbd Use the keyboard controller. cold reset (default) - acpi Use the RESET_REG in the FADT - efi Use efi reset_system runtime service - pci Use the so-called "PCI reset register", CF9 - force Avoid anything that could hang. - */ -static int __init reboot_setup(char *str) -{ - for (;;) { - /* Having anything passed on the command line via - * reboot= will cause us to disable DMI checking - * below. - */ - reboot_default = 0; - - switch (*str) { - case 'w': - reboot_mode = 0x1234; - break; - - case 'c': - reboot_mode = 0; - break; - -#ifdef CONFIG_X86_32 -#ifdef CONFIG_SMP - case 's': - if (isdigit(*(str+1))) { - reboot_cpu = (int) (*(str+1) - '0'); - if (isdigit(*(str+2))) - reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); - } - /* we will leave sorting out the final value - when we are ready to reboot, since we might not - have detected BSP APIC ID or smp_num_cpu */ - break; -#endif /* CONFIG_SMP */ - - case 'b': -#endif - case 'a': - case 'k': - case 't': - case 'e': - case 'p': - reboot_type = *str; - break; - - case 'f': - reboot_force = 1; - break; - } - - str = strchr(str, ','); - if (str) - str++; - else - break; - } - return 1; -} - -__setup("reboot=", reboot_setup); - - -#ifdef CONFIG_X86_32 -/* - * Reboot options and system auto-detection code provided by - * Dell Inc. so their systems "just work". :-) - */ - -/* - * Some machines require the "reboot=b" or "reboot=k" commandline options, - * this quirk makes that automatic. - */ -static int __init set_bios_reboot(const struct dmi_system_id *d) -{ - if (reboot_type != BOOT_BIOS) { - reboot_type = BOOT_BIOS; - printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); - } - return 0; -} - -static int __init set_kbd_reboot(const struct dmi_system_id *d) -{ - if (reboot_type != BOOT_KBD) { - reboot_type = BOOT_KBD; - printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); - } - return 0; -} - -static struct dmi_system_id __initdata reboot_dmi_table[] = { - { /* Handle problems with rebooting on Dell E520's */ - .callback = set_bios_reboot, - .ident = "Dell E520", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"), - }, - }, - { /* Handle problems with rebooting on Dell 1300's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 1300", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), - }, - }, - { /* Handle problems with rebooting on Dell 300's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 300", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), - }, - }, - { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 745", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), - }, - }, - { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 745", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), - DMI_MATCH(DMI_BOARD_NAME, "0MM599"), - }, - }, - { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 745", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), - DMI_MATCH(DMI_BOARD_NAME, "0KW626"), - }, - }, - { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 330", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"), - DMI_MATCH(DMI_BOARD_NAME, "0KP561"), - }, - }, - { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 360", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"), - DMI_MATCH(DMI_BOARD_NAME, "0T656F"), - }, - }, - { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 760", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"), - DMI_MATCH(DMI_BOARD_NAME, "0G919G"), - }, - }, - { /* Handle problems with rebooting on Dell 2400's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 2400", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), - }, - }, - { /* Handle problems with rebooting on Dell T5400's */ - .callback = set_bios_reboot, - .ident = "Dell Precision T5400", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), - }, - }, - { /* Handle problems with rebooting on Dell T7400's */ - .callback = set_bios_reboot, - .ident = "Dell Precision T7400", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"), - }, - }, - { /* Handle problems with rebooting on HP laptops */ - .callback = set_bios_reboot, - .ident = "HP Compaq Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), - }, - }, - { /* Handle problems with rebooting on Dell XPS710 */ - .callback = set_bios_reboot, - .ident = "Dell XPS710", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), - }, - }, - { /* Handle problems with rebooting on Dell DXP061 */ - .callback = set_bios_reboot, - .ident = "Dell DXP061", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"), - }, - }, - { /* Handle problems with rebooting on Sony VGN-Z540N */ - .callback = set_bios_reboot, - .ident = "Sony VGN-Z540N", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), - }, - }, - { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */ - .callback = set_bios_reboot, - .ident = "CompuLab SBC-FITPC2", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"), - DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), - }, - }, - { /* Handle problems with rebooting on ASUS P4S800 */ - .callback = set_bios_reboot, - .ident = "ASUS P4S800", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P4S800"), - }, - }, - { /* Handle reboot issue on Acer Aspire one */ - .callback = set_kbd_reboot, - .ident = "Acer Aspire One A110", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), - }, - }, - { } -}; - -static int __init reboot_init(void) -{ - /* Only do the DMI check if reboot_type hasn't been overridden - * on the command line - */ - if (reboot_default) { - dmi_check_system(reboot_dmi_table); - } - return 0; -} -core_initcall(reboot_init); - -extern const unsigned char machine_real_restart_asm[]; -extern const u64 machine_real_restart_gdt[3]; - -void machine_real_restart(unsigned int type) -{ - void *restart_va; - unsigned long restart_pa; - void (*restart_lowmem)(unsigned int); - u64 *lowmem_gdt; - - local_irq_disable(); - - /* Write zero to CMOS register number 0x0f, which the BIOS POST - routine will recognize as telling it to do a proper reboot. (Well - that's what this book in front of me says -- it may only apply to - the Phoenix BIOS though, it's not clear). At the same time, - disable NMIs by setting the top bit in the CMOS address register, - as we're about to do peculiar things to the CPU. I'm not sure if - `outb_p' is needed instead of just `outb'. Use it to be on the - safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) - */ - spin_lock(&rtc_lock); - CMOS_WRITE(0x00, 0x8f); - spin_unlock(&rtc_lock); - - /* - * Switch back to the initial page table. - */ - load_cr3(initial_page_table); - - /* Write 0x1234 to absolute memory location 0x472. The BIOS reads - this on booting to tell it to "Bypass memory test (also warm - boot)". This seems like a fairly standard thing that gets set by - REBOOT.COM programs, and the previous reset routine did this - too. */ - *((unsigned short *)0x472) = reboot_mode; - - /* Patch the GDT in the low memory trampoline */ - lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); - - restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); - restart_pa = virt_to_phys(restart_va); - restart_lowmem = (void (*)(unsigned int))restart_pa; - - /* GDT[0]: GDT self-pointer */ - lowmem_gdt[0] = - (u64)(sizeof(machine_real_restart_gdt) - 1) + - ((u64)virt_to_phys(lowmem_gdt) << 16); - /* GDT[1]: 64K real mode code segment */ - lowmem_gdt[1] = - GDT_ENTRY(0x009b, restart_pa, 0xffff); - - /* Jump to the identity-mapped low memory code */ - restart_lowmem(type); -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(machine_real_restart); -#endif - -#endif /* CONFIG_X86_32 */ - -/* - * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot - */ -static int __init set_pci_reboot(const struct dmi_system_id *d) -{ - if (reboot_type != BOOT_CF9) { - reboot_type = BOOT_CF9; - printk(KERN_INFO "%s series board detected. " - "Selecting PCI-method for reboots.\n", d->ident); - } - return 0; -} - -static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { - { /* Handle problems with rebooting on Apple MacBook5 */ - .callback = set_pci_reboot, - .ident = "Apple MacBook5", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), - }, - }, - { /* Handle problems with rebooting on Apple MacBookPro5 */ - .callback = set_pci_reboot, - .ident = "Apple MacBookPro5", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), - }, - }, - { /* Handle problems with rebooting on Apple Macmini3,1 */ - .callback = set_pci_reboot, - .ident = "Apple Macmini3,1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), - }, - }, - { /* Handle problems with rebooting on the iMac9,1. */ - .callback = set_pci_reboot, - .ident = "Apple iMac9,1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), - }, - }, - { /* Handle problems with rebooting on the Latitude E6320. */ - .callback = set_pci_reboot, - .ident = "Dell Latitude E6320", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), - }, - }, - { /* Handle problems with rebooting on the Latitude E5420. */ - .callback = set_pci_reboot, - .ident = "Dell Latitude E5420", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"), - }, - }, - { /* Handle problems with rebooting on the Latitude E6420. */ - .callback = set_pci_reboot, - .ident = "Dell Latitude E6420", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), - }, - }, - { /* Handle problems with rebooting on the OptiPlex 990. */ - .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), - }, - }, - { /* Handle problems with rebooting on the Precision M6600. */ - .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), - }, - }, - { } -}; - -static int __init pci_reboot_init(void) -{ - /* Only do the DMI check if reboot_type hasn't been overridden - * on the command line - */ - if (reboot_default) { - dmi_check_system(pci_reboot_dmi_table); - } - return 0; -} -core_initcall(pci_reboot_init); - -static inline void kb_wait(void) -{ - int i; - - for (i = 0; i < 0x10000; i++) { - if ((inb(0x64) & 0x02) == 0) - break; - udelay(2); - } -} - -static void vmxoff_nmi(int cpu, struct pt_regs *regs) -{ - cpu_emergency_vmxoff(); -} - -/* Use NMIs as IPIs to tell all CPUs to disable virtualization - */ -static void emergency_vmx_disable_all(void) -{ - /* Just make sure we won't change CPUs while doing this */ - local_irq_disable(); - - /* We need to disable VMX on all CPUs before rebooting, otherwise - * we risk hanging up the machine, because the CPU ignore INIT - * signals when VMX is enabled. - * - * We can't take any locks and we may be on an inconsistent - * state, so we use NMIs as IPIs to tell the other CPUs to disable - * VMX and halt. - * - * For safety, we will avoid running the nmi_shootdown_cpus() - * stuff unnecessarily, but we don't have a way to check - * if other CPUs have VMX enabled. So we will call it only if the - * CPU we are running on has VMX enabled. - * - * We will miss cases where VMX is not enabled on all CPUs. This - * shouldn't do much harm because KVM always enable VMX on all - * CPUs anyway. But we can miss it on the small window where KVM - * is still enabling VMX. - */ - if (cpu_has_vmx() && cpu_vmx_enabled()) { - /* Disable VMX on this CPU. - */ - cpu_vmxoff(); - - /* Halt and disable VMX on the other CPUs */ - nmi_shootdown_cpus(vmxoff_nmi); - - } -} - - -void __attribute__((weak)) mach_reboot_fixups(void) -{ -} - -/* - * Windows compatible x86 hardware expects the following on reboot: - * - * 1) If the FADT has the ACPI reboot register flag set, try it - * 2) If still alive, write to the keyboard controller - * 3) If still alive, write to the ACPI reboot register again - * 4) If still alive, write to the keyboard controller again - * - * If the machine is still alive at this stage, it gives up. We default to - * following the same pattern, except that if we're still alive after (4) we'll - * try to force a triple fault and then cycle between hitting the keyboard - * controller and doing that - */ -static void native_machine_emergency_restart(void) -{ - int i; - int attempt = 0; - int orig_reboot_type = reboot_type; - - if (reboot_emergency) - emergency_vmx_disable_all(); - - tboot_shutdown(TB_SHUTDOWN_REBOOT); - - /* Tell the BIOS if we want cold or warm reboot */ - *((unsigned short *)__va(0x472)) = reboot_mode; - - for (;;) { - /* Could also try the reset bit in the Hammer NB */ - switch (reboot_type) { - case BOOT_KBD: - mach_reboot_fixups(); /* for board specific fixups */ - - for (i = 0; i < 10; i++) { - kb_wait(); - udelay(50); - outb(0xfe, 0x64); /* pulse reset low */ - udelay(50); - } - if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { - attempt = 1; - reboot_type = BOOT_ACPI; - } else { - reboot_type = BOOT_TRIPLE; - } - break; - - case BOOT_TRIPLE: - load_idt(&no_idt); - __asm__ __volatile__("int3"); - - reboot_type = BOOT_KBD; - break; - -#ifdef CONFIG_X86_32 - case BOOT_BIOS: - machine_real_restart(MRR_BIOS); - - reboot_type = BOOT_KBD; - break; -#endif - - case BOOT_ACPI: - acpi_reboot(); - reboot_type = BOOT_KBD; - break; - - case BOOT_EFI: - if (efi_enabled) - efi.reset_system(reboot_mode ? - EFI_RESET_WARM : - EFI_RESET_COLD, - EFI_SUCCESS, 0, NULL); - reboot_type = BOOT_KBD; - break; - - case BOOT_CF9: - port_cf9_safe = true; - /* fall through */ - - case BOOT_CF9_COND: - if (port_cf9_safe) { - u8 cf9 = inb(0xcf9) & ~6; - outb(cf9|2, 0xcf9); /* Request hard reset */ - udelay(50); - outb(cf9|6, 0xcf9); /* Actually do the reset */ - udelay(50); - } - reboot_type = BOOT_KBD; - break; - } - } -} - -void native_machine_shutdown(void) -{ - /* Stop the cpus and apics */ -#ifdef CONFIG_SMP - - /* The boot cpu is always logical cpu 0 */ - int reboot_cpu_id = 0; - -#ifdef CONFIG_X86_32 - /* See if there has been given a command line override */ - if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && - cpu_online(reboot_cpu)) - reboot_cpu_id = reboot_cpu; -#endif - - /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) - reboot_cpu_id = smp_processor_id(); - - /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); - - /* O.K Now that I'm on the appropriate processor, - * stop all of the others. - */ - stop_other_cpus(); -#endif - - lapic_shutdown(); - -#ifdef CONFIG_X86_IO_APIC - disable_IO_APIC(); -#endif - -#ifdef CONFIG_HPET_TIMER - hpet_disable(); -#endif - -#ifdef CONFIG_X86_64 - x86_platform.iommu_shutdown(); -#endif -} - -static void __machine_emergency_restart(int emergency) -{ - reboot_emergency = emergency; - machine_ops.emergency_restart(); -} - -static void native_machine_restart(char *__unused) -{ - printk("machine restart\n"); - - if (!reboot_force) - machine_shutdown(); - __machine_emergency_restart(0); -} - -static void native_machine_halt(void) -{ - /* stop other cpus and apics */ - machine_shutdown(); - - tboot_shutdown(TB_SHUTDOWN_HALT); - - /* stop this cpu */ - stop_this_cpu(NULL); -} - -static void native_machine_power_off(void) -{ - if (pm_power_off) { - if (!reboot_force) - machine_shutdown(); - pm_power_off(); - } - /* a fallback in case there is no PM info available */ - tboot_shutdown(TB_SHUTDOWN_HALT); -} - -struct machine_ops machine_ops = { - .power_off = native_machine_power_off, - .shutdown = native_machine_shutdown, - .emergency_restart = native_machine_emergency_restart, - .restart = native_machine_restart, - .halt = native_machine_halt, -#ifdef CONFIG_KEXEC - .crash_shutdown = native_machine_crash_shutdown, -#endif -}; - -void machine_power_off(void) -{ - machine_ops.power_off(); -} - -void machine_shutdown(void) -{ - machine_ops.shutdown(); -} - -void machine_emergency_restart(void) -{ - __machine_emergency_restart(1); -} - -void machine_restart(char *cmd) -{ - machine_ops.restart(cmd); -} - -void machine_halt(void) -{ - machine_ops.halt(); -} - -#ifdef CONFIG_KEXEC -void machine_crash_shutdown(struct pt_regs *regs) -{ - machine_ops.crash_shutdown(regs); -} -#endif - - -#if defined(CONFIG_SMP) - -/* This keeps a track of which one is crashing cpu. */ -static int crashing_cpu; -static nmi_shootdown_cb shootdown_callback; - -static atomic_t waiting_for_crash_ipi; - -static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) -{ - int cpu; - - cpu = raw_smp_processor_id(); - - /* Don't do anything if this handler is invoked on crashing cpu. - * Otherwise, system will completely hang. Crashing cpu can get - * an NMI if system was initially booted with nmi_watchdog parameter. - */ - if (cpu == crashing_cpu) - return NMI_HANDLED; - local_irq_disable(); - - shootdown_callback(cpu, regs); - - atomic_dec(&waiting_for_crash_ipi); - /* Assume hlt works */ - halt(); - for (;;) - cpu_relax(); - - return NMI_HANDLED; -} - -static void smp_send_nmi_allbutself(void) -{ - apic->send_IPI_allbutself(NMI_VECTOR); -} - -/* Halt all other CPUs, calling the specified function on each of them - * - * This function can be used to halt all other CPUs on crash - * or emergency reboot time. The function passed as parameter - * will be called inside a NMI handler on all CPUs. - */ -void nmi_shootdown_cpus(nmi_shootdown_cb callback) -{ - unsigned long msecs; - local_irq_disable(); - - /* Make a note of crashing cpu. Will be used in NMI callback.*/ - crashing_cpu = safe_smp_processor_id(); - - shootdown_callback = callback; - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - /* Would it be better to replace the trap vector here? */ - if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, - NMI_FLAG_FIRST, "crash")) - return; /* return what? */ - /* Ensure the new callback function is set before sending - * out the NMI - */ - wmb(); - - smp_send_nmi_allbutself(); - - msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { - mdelay(1); - msecs--; - } - - /* Leave the nmi callback set */ -} -#else /* !CONFIG_SMP */ -void nmi_shootdown_cpus(nmi_shootdown_cb callback) -{ - /* No other CPUs to shoot down */ -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/reboot_32.S b/ANDROID_3.4.5/arch/x86/kernel/reboot_32.S deleted file mode 100644 index 1d5c46df..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/reboot_32.S +++ /dev/null @@ -1,135 +0,0 @@ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/segment.h> -#include <asm/page_types.h> - -/* - * The following code and data reboots the machine by switching to real - * mode and jumping to the BIOS reset entry point, as if the CPU has - * really been reset. The previous version asked the keyboard - * controller to pulse the CPU reset line, which is more thorough, but - * doesn't work with at least one type of 486 motherboard. It is easy - * to stop this code working; hence the copious comments. - * - * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. - */ - .section ".x86_trampoline","a" - .balign 16 - .code32 -ENTRY(machine_real_restart_asm) -r_base = . - /* Get our own relocated address */ - call 1f -1: popl %ebx - subl $(1b - r_base), %ebx - - /* Compute the equivalent real-mode segment */ - movl %ebx, %ecx - shrl $4, %ecx - - /* Patch post-real-mode segment jump */ - movw (dispatch_table - r_base)(%ebx,%eax,2),%ax - movw %ax, (101f - r_base)(%ebx) - movw %cx, (102f - r_base)(%ebx) - - /* Set up the IDT for real mode. */ - lidtl (machine_real_restart_idt - r_base)(%ebx) - - /* - * Set up a GDT from which we can load segment descriptors for real - * mode. The GDT is not used in real mode; it is just needed here to - * prepare the descriptors. - */ - lgdtl (machine_real_restart_gdt - r_base)(%ebx) - - /* - * Load the data segment registers with 16-bit compatible values - */ - movl $16, %ecx - movl %ecx, %ds - movl %ecx, %es - movl %ecx, %fs - movl %ecx, %gs - movl %ecx, %ss - ljmpl $8, $1f - r_base - -/* - * This is 16-bit protected mode code to disable paging and the cache, - * switch to real mode and jump to the BIOS reset code. - * - * The instruction that switches to real mode by writing to CR0 must be - * followed immediately by a far jump instruction, which set CS to a - * valid value for real mode, and flushes the prefetch queue to avoid - * running instructions that have already been decoded in protected - * mode. - * - * Clears all the flags except ET, especially PG (paging), PE - * (protected-mode enable) and TS (task switch for coprocessor state - * save). Flushes the TLB after paging has been disabled. Sets CD and - * NW, to disable the cache on a 486, and invalidates the cache. This - * is more like the state of a 486 after reset. I don't know if - * something else should be done for other chips. - * - * More could be done here to set up the registers as if a CPU reset had - * occurred; hopefully real BIOSs don't assume much. This is not the - * actual BIOS entry point, anyway (that is at 0xfffffff0). - * - * Most of this work is probably excessive, but it is what is tested. - */ - .code16 -1: - xorl %ecx, %ecx - movl %cr0, %eax - andl $0x00000011, %eax - orl $0x60000000, %eax - movl %eax, %cr0 - movl %ecx, %cr3 - movl %cr0, %edx - andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ - jz 2f - wbinvd -2: - andb $0x10, %al - movl %eax, %cr0 - .byte 0xea /* ljmpw */ -101: .word 0 /* Offset */ -102: .word 0 /* Segment */ - -bios: - ljmpw $0xf000, $0xfff0 - -apm: - movw $0x1000, %ax - movw %ax, %ss - movw $0xf000, %sp - movw $0x5307, %ax - movw $0x0001, %bx - movw $0x0003, %cx - int $0x15 - -END(machine_real_restart_asm) - - .balign 16 - /* These must match <asm/reboot.h */ -dispatch_table: - .word bios - r_base - .word apm - r_base -END(dispatch_table) - - .balign 16 -machine_real_restart_idt: - .word 0xffff /* Length - real mode default value */ - .long 0 /* Base - real mode default value */ -END(machine_real_restart_idt) - - .balign 16 -ENTRY(machine_real_restart_gdt) - .quad 0 /* Self-pointer, filled in by PM code */ - .quad 0 /* 16-bit code segment, filled in by PM code */ - /* - * 16-bit data segment with the selector value 16 = 0x10 and - * base value 0x100; since this is consistent with real mode - * semantics we don't have to reload the segments once CR0.PE = 0. - */ - .quad GDT_ENTRY(0x0093, 0x100, 0xffff) -END(machine_real_restart_gdt) diff --git a/ANDROID_3.4.5/arch/x86/kernel/reboot_fixups_32.c b/ANDROID_3.4.5/arch/x86/kernel/reboot_fixups_32.c deleted file mode 100644 index c8e41e90..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/reboot_fixups_32.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * This is a good place to put board specific reboot fixups. - * - * List of supported fixups: - * geode-gx1/cs5530a - Jaya Kumar <jayalk@intworks.biz> - * geode-gx/lx/cs5536 - Andres Salomon <dilinger@debian.org> - * - */ - -#include <asm/delay.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <asm/reboot_fixups.h> -#include <asm/msr.h> -#include <linux/cs5535.h> - -static void cs5530a_warm_reset(struct pci_dev *dev) -{ - /* writing 1 to the reset control register, 0x44 causes the - cs5530a to perform a system warm reset */ - pci_write_config_byte(dev, 0x44, 0x1); - udelay(50); /* shouldn't get here but be safe and spin-a-while */ - return; -} - -static void cs5536_warm_reset(struct pci_dev *dev) -{ - /* writing 1 to the LSB of this MSR causes a hard reset */ - wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL); - udelay(50); /* shouldn't get here but be safe and spin a while */ -} - -static void rdc321x_reset(struct pci_dev *dev) -{ - unsigned i; - /* Voluntary reset the watchdog timer */ - outl(0x80003840, 0xCF8); - /* Generate a CPU reset on next tick */ - i = inl(0xCFC); - /* Use the minimum timer resolution */ - i |= 0x1600; - outl(i, 0xCFC); - outb(1, 0x92); -} - -static void ce4100_reset(struct pci_dev *dev) -{ - int i; - - for (i = 0; i < 10; i++) { - outb(0x2, 0xcf9); - udelay(50); - } -} - -struct device_fixup { - unsigned int vendor; - unsigned int device; - void (*reboot_fixup)(struct pci_dev *); -}; - -/* - * PCI ids solely used for fixups_table go here - */ -#define PCI_DEVICE_ID_INTEL_CE4100 0x0708 - -static const struct device_fixup fixups_table[] = { -{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, -{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, -{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, -{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, -{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset }, -}; - -/* - * we see if any fixup is available for our current hardware. if there - * is a fixup, we call it and we expect to never return from it. if we - * do return, we keep looking and then eventually fall back to the - * standard mach_reboot on return. - */ -void mach_reboot_fixups(void) -{ - const struct device_fixup *cur; - struct pci_dev *dev; - int i; - - /* we can be called from sysrq-B code. In such a case it is - * prohibited to dig PCI */ - if (in_interrupt()) - return; - - for (i=0; i < ARRAY_SIZE(fixups_table); i++) { - cur = &(fixups_table[i]); - dev = pci_get_device(cur->vendor, cur->device, NULL); - if (!dev) - continue; - - cur->reboot_fixup(dev); - pci_dev_put(dev); - } -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/relocate_kernel_32.S b/ANDROID_3.4.5/arch/x86/kernel/relocate_kernel_32.S deleted file mode 100644 index 36818f8e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/relocate_kernel_32.S +++ /dev/null @@ -1,277 +0,0 @@ -/* - * relocate_kernel.S - put the kernel image in place to boot - * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include <linux/linkage.h> -#include <asm/page_types.h> -#include <asm/kexec.h> -#include <asm/processor-flags.h> - -/* - * Must be relocatable PIC code callable as a C function - */ - -#define PTR(x) (x << 2) - -/* - * control_page + KEXEC_CONTROL_CODE_MAX_SIZE - * ~ control_page + PAGE_SIZE are used as data storage and stack for - * jumping back - */ -#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) - -/* Minimal CPU state */ -#define ESP DATA(0x0) -#define CR0 DATA(0x4) -#define CR3 DATA(0x8) -#define CR4 DATA(0xc) - -/* other data */ -#define CP_VA_CONTROL_PAGE DATA(0x10) -#define CP_PA_PGD DATA(0x14) -#define CP_PA_SWAP_PAGE DATA(0x18) -#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) - - .text - .globl relocate_kernel -relocate_kernel: - /* Save the CPU context, used for jumping back */ - - pushl %ebx - pushl %esi - pushl %edi - pushl %ebp - pushf - - movl 20+8(%esp), %ebp /* list of pages */ - movl PTR(VA_CONTROL_PAGE)(%ebp), %edi - movl %esp, ESP(%edi) - movl %cr0, %eax - movl %eax, CR0(%edi) - movl %cr3, %eax - movl %eax, CR3(%edi) - movl %cr4, %eax - movl %eax, CR4(%edi) - - /* read the arguments and say goodbye to the stack */ - movl 20+4(%esp), %ebx /* page_list */ - movl 20+8(%esp), %ebp /* list of pages */ - movl 20+12(%esp), %edx /* start address */ - movl 20+16(%esp), %ecx /* cpu_has_pae */ - movl 20+20(%esp), %esi /* preserve_context */ - - /* zero out flags, and disable interrupts */ - pushl $0 - popfl - - /* save some information for jumping back */ - movl PTR(VA_CONTROL_PAGE)(%ebp), %edi - movl %edi, CP_VA_CONTROL_PAGE(%edi) - movl PTR(PA_PGD)(%ebp), %eax - movl %eax, CP_PA_PGD(%edi) - movl PTR(PA_SWAP_PAGE)(%ebp), %eax - movl %eax, CP_PA_SWAP_PAGE(%edi) - movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) - - /* - * get physical address of control page now - * this is impossible after page table switch - */ - movl PTR(PA_CONTROL_PAGE)(%ebp), %edi - - /* switch to new set of page tables */ - movl PTR(PA_PGD)(%ebp), %eax - movl %eax, %cr3 - - /* setup a new stack at the end of the physical control page */ - lea PAGE_SIZE(%edi), %esp - - /* jump to identity mapped page */ - movl %edi, %eax - addl $(identity_mapped - relocate_kernel), %eax - pushl %eax - ret - -identity_mapped: - /* set return address to 0 if not preserving context */ - pushl $0 - /* store the start address on the stack */ - pushl %edx - - /* - * Set cr0 to a known state: - * - Paging disabled - * - Alignment check disabled - * - Write protect disabled - * - No task switch - * - Don't do FP software emulation. - * - Proctected mode enabled - */ - movl %cr0, %eax - andl $~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax - orl $(X86_CR0_PE), %eax - movl %eax, %cr0 - - /* clear cr4 if applicable */ - testl %ecx, %ecx - jz 1f - /* - * Set cr4 to a known state: - * Setting everything to zero seems safe. - */ - xorl %eax, %eax - movl %eax, %cr4 - - jmp 1f -1: - - /* Flush the TLB (needed?) */ - xorl %eax, %eax - movl %eax, %cr3 - - movl CP_PA_SWAP_PAGE(%edi), %eax - pushl %eax - pushl %ebx - call swap_pages - addl $8, %esp - - /* - * To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB, it's handy, and not processor dependent. - */ - xorl %eax, %eax - movl %eax, %cr3 - - /* - * set all of the registers to known values - * leave %esp alone - */ - - testl %esi, %esi - jnz 1f - xorl %edi, %edi - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - xorl %edx, %edx - xorl %esi, %esi - xorl %ebp, %ebp - ret -1: - popl %edx - movl CP_PA_SWAP_PAGE(%edi), %esp - addl $PAGE_SIZE, %esp -2: - call *%edx - - /* get the re-entry point of the peer system */ - movl 0(%esp), %ebp - call 1f -1: - popl %ebx - subl $(1b - relocate_kernel), %ebx - movl CP_VA_CONTROL_PAGE(%ebx), %edi - lea PAGE_SIZE(%ebx), %esp - movl CP_PA_SWAP_PAGE(%ebx), %eax - movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx - pushl %eax - pushl %edx - call swap_pages - addl $8, %esp - movl CP_PA_PGD(%ebx), %eax - movl %eax, %cr3 - movl %cr0, %eax - orl $(1<<31), %eax - movl %eax, %cr0 - lea PAGE_SIZE(%edi), %esp - movl %edi, %eax - addl $(virtual_mapped - relocate_kernel), %eax - pushl %eax - ret - -virtual_mapped: - movl CR4(%edi), %eax - movl %eax, %cr4 - movl CR3(%edi), %eax - movl %eax, %cr3 - movl CR0(%edi), %eax - movl %eax, %cr0 - movl ESP(%edi), %esp - movl %ebp, %eax - - popf - popl %ebp - popl %edi - popl %esi - popl %ebx - ret - - /* Do the copies */ -swap_pages: - movl 8(%esp), %edx - movl 4(%esp), %ecx - pushl %ebp - pushl %ebx - pushl %edi - pushl %esi - movl %ecx, %ebx - jmp 1f - -0: /* top, read another word from the indirection page */ - movl (%ebx), %ecx - addl $4, %ebx -1: - testl $0x1, %ecx /* is it a destination page */ - jz 2f - movl %ecx, %edi - andl $0xfffff000, %edi - jmp 0b -2: - testl $0x2, %ecx /* is it an indirection page */ - jz 2f - movl %ecx, %ebx - andl $0xfffff000, %ebx - jmp 0b -2: - testl $0x4, %ecx /* is it the done indicator */ - jz 2f - jmp 3f -2: - testl $0x8, %ecx /* is it the source indicator */ - jz 0b /* Ignore it otherwise */ - movl %ecx, %esi /* For every source page do a copy */ - andl $0xfffff000, %esi - - movl %edi, %eax - movl %esi, %ebp - - movl %edx, %edi - movl $1024, %ecx - rep ; movsl - - movl %ebp, %edi - movl %eax, %esi - movl $1024, %ecx - rep ; movsl - - movl %eax, %edi - movl %edx, %esi - movl $1024, %ecx - rep ; movsl - - lea PAGE_SIZE(%ebp), %esi - jmp 0b -3: - popl %esi - popl %edi - popl %ebx - popl %ebp - ret - - .globl kexec_control_code_size -.set kexec_control_code_size, . - relocate_kernel diff --git a/ANDROID_3.4.5/arch/x86/kernel/relocate_kernel_64.S b/ANDROID_3.4.5/arch/x86/kernel/relocate_kernel_64.S deleted file mode 100644 index 7a6f3b3b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/relocate_kernel_64.S +++ /dev/null @@ -1,268 +0,0 @@ -/* - * relocate_kernel.S - put the kernel image in place to boot - * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include <linux/linkage.h> -#include <asm/page_types.h> -#include <asm/kexec.h> -#include <asm/processor-flags.h> -#include <asm/pgtable_types.h> - -/* - * Must be relocatable PIC code callable as a C function - */ - -#define PTR(x) (x << 3) -#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) - -/* - * control_page + KEXEC_CONTROL_CODE_MAX_SIZE - * ~ control_page + PAGE_SIZE are used as data storage and stack for - * jumping back - */ -#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) - -/* Minimal CPU state */ -#define RSP DATA(0x0) -#define CR0 DATA(0x8) -#define CR3 DATA(0x10) -#define CR4 DATA(0x18) - -/* other data */ -#define CP_PA_TABLE_PAGE DATA(0x20) -#define CP_PA_SWAP_PAGE DATA(0x28) -#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) - - .text - .align PAGE_SIZE - .code64 - .globl relocate_kernel -relocate_kernel: - /* - * %rdi indirection_page - * %rsi page_list - * %rdx start address - * %rcx preserve_context - */ - - /* Save the CPU context, used for jumping back */ - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - pushf - - movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 - movq %rsp, RSP(%r11) - movq %cr0, %rax - movq %rax, CR0(%r11) - movq %cr3, %rax - movq %rax, CR3(%r11) - movq %cr4, %rax - movq %rax, CR4(%r11) - - /* zero out flags, and disable interrupts */ - pushq $0 - popfq - - /* - * get physical address of control page now - * this is impossible after page table switch - */ - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 - - /* get physical address of page table now too */ - movq PTR(PA_TABLE_PAGE)(%rsi), %r9 - - /* get physical address of swap page now */ - movq PTR(PA_SWAP_PAGE)(%rsi), %r10 - - /* save some information for jumping back */ - movq %r9, CP_PA_TABLE_PAGE(%r11) - movq %r10, CP_PA_SWAP_PAGE(%r11) - movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) - - /* Switch to the identity mapped page tables */ - movq %r9, %cr3 - - /* setup a new stack at the end of the physical control page */ - lea PAGE_SIZE(%r8), %rsp - - /* jump to identity mapped page */ - addq $(identity_mapped - relocate_kernel), %r8 - pushq %r8 - ret - -identity_mapped: - /* set return address to 0 if not preserving context */ - pushq $0 - /* store the start address on the stack */ - pushq %rdx - - /* - * Set cr0 to a known state: - * - Paging enabled - * - Alignment check disabled - * - Write protect disabled - * - No task switch - * - Don't do FP software emulation. - * - Proctected mode enabled - */ - movq %cr0, %rax - andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax - orl $(X86_CR0_PG | X86_CR0_PE), %eax - movq %rax, %cr0 - - /* - * Set cr4 to a known state: - * - physical address extension enabled - */ - movq $X86_CR4_PAE, %rax - movq %rax, %cr4 - - jmp 1f -1: - - /* Flush the TLB (needed?) */ - movq %r9, %cr3 - - movq %rcx, %r11 - call swap_pages - - /* - * To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB by reloading %cr3 here, it's handy, - * and not processor dependent. - */ - movq %cr3, %rax - movq %rax, %cr3 - - /* - * set all of the registers to known values - * leave %rsp alone - */ - - testq %r11, %r11 - jnz 1f - xorq %rax, %rax - xorq %rbx, %rbx - xorq %rcx, %rcx - xorq %rdx, %rdx - xorq %rsi, %rsi - xorq %rdi, %rdi - xorq %rbp, %rbp - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r9 - xorq %r11, %r11 - xorq %r12, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 - - ret - -1: - popq %rdx - leaq PAGE_SIZE(%r10), %rsp - call *%rdx - - /* get the re-entry point of the peer system */ - movq 0(%rsp), %rbp - call 1f -1: - popq %r8 - subq $(1b - relocate_kernel), %r8 - movq CP_PA_SWAP_PAGE(%r8), %r10 - movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi - movq CP_PA_TABLE_PAGE(%r8), %rax - movq %rax, %cr3 - lea PAGE_SIZE(%r8), %rsp - call swap_pages - movq $virtual_mapped, %rax - pushq %rax - ret - -virtual_mapped: - movq RSP(%r8), %rsp - movq CR4(%r8), %rax - movq %rax, %cr4 - movq CR3(%r8), %rax - movq CR0(%r8), %r8 - movq %rax, %cr3 - movq %r8, %cr0 - movq %rbp, %rax - - popf - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx - ret - - /* Do the copies */ -swap_pages: - movq %rdi, %rcx /* Put the page_list in %rcx */ - xorq %rdi, %rdi - xorq %rsi, %rsi - jmp 1f - -0: /* top, read another word for the indirection page */ - - movq (%rbx), %rcx - addq $8, %rbx -1: - testq $0x1, %rcx /* is it a destination page? */ - jz 2f - movq %rcx, %rdi - andq $0xfffffffffffff000, %rdi - jmp 0b -2: - testq $0x2, %rcx /* is it an indirection page? */ - jz 2f - movq %rcx, %rbx - andq $0xfffffffffffff000, %rbx - jmp 0b -2: - testq $0x4, %rcx /* is it the done indicator? */ - jz 2f - jmp 3f -2: - testq $0x8, %rcx /* is it the source indicator? */ - jz 0b /* Ignore it otherwise */ - movq %rcx, %rsi /* For ever source page do a copy */ - andq $0xfffffffffffff000, %rsi - - movq %rdi, %rdx - movq %rsi, %rax - - movq %r10, %rdi - movq $512, %rcx - rep ; movsq - - movq %rax, %rdi - movq %rdx, %rsi - movq $512, %rcx - rep ; movsq - - movq %rdx, %rdi - movq %r10, %rsi - movq $512, %rcx - rep ; movsq - - lea PAGE_SIZE(%rax), %rsi - jmp 0b -3: - ret - - .globl kexec_control_code_size -.set kexec_control_code_size, . - relocate_kernel diff --git a/ANDROID_3.4.5/arch/x86/kernel/resource.c b/ANDROID_3.4.5/arch/x86/kernel/resource.c deleted file mode 100644 index 2a26819b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/resource.c +++ /dev/null @@ -1,48 +0,0 @@ -#include <linux/ioport.h> -#include <asm/e820.h> - -static void resource_clip(struct resource *res, resource_size_t start, - resource_size_t end) -{ - resource_size_t low = 0, high = 0; - - if (res->end < start || res->start > end) - return; /* no conflict */ - - if (res->start < start) - low = start - res->start; - - if (res->end > end) - high = res->end - end; - - /* Keep the area above or below the conflict, whichever is larger */ - if (low > high) - res->end = start - 1; - else - res->start = end + 1; -} - -static void remove_e820_regions(struct resource *avail) -{ - int i; - struct e820entry *entry; - - for (i = 0; i < e820.nr_map; i++) { - entry = &e820.map[i]; - - resource_clip(avail, entry->addr, - entry->addr + entry->size - 1); - } -} - -void arch_remove_reservations(struct resource *avail) -{ - /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ - if (avail->flags & IORESOURCE_MEM) { - if (avail->start < BIOS_END) - avail->start = BIOS_END; - resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); - - remove_e820_regions(avail); - } -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/rtc.c b/ANDROID_3.4.5/arch/x86/kernel/rtc.c deleted file mode 100644 index af6db6ec..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/rtc.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * RTC related functions - */ -#include <linux/platform_device.h> -#include <linux/mc146818rtc.h> -#include <linux/acpi.h> -#include <linux/bcd.h> -#include <linux/export.h> -#include <linux/pnp.h> -#include <linux/of.h> - -#include <asm/vsyscall.h> -#include <asm/x86_init.h> -#include <asm/time.h> -#include <asm/mrst.h> - -#ifdef CONFIG_X86_32 -/* - * This is a special lock that is owned by the CPU and holds the index - * register we are working with. It is required for NMI access to the - * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. - */ -volatile unsigned long cmos_lock; -EXPORT_SYMBOL(cmos_lock); -#endif /* CONFIG_X86_32 */ - -/* For two digit years assume time is always after that */ -#define CMOS_YEARS_OFFS 2000 - -DEFINE_SPINLOCK(rtc_lock); -EXPORT_SYMBOL(rtc_lock); - -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ -int mach_set_rtc_mmss(unsigned long nowtime) -{ - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - unsigned long flags; - int retval = 0; - - spin_lock_irqsave(&rtc_lock, flags); - - /* tell the clock it's being set */ - save_control = CMOS_READ(RTC_CONTROL); - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - /* stop and reset prescaler */ - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - cmos_minutes = bcd2bin(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, - * don't interfere with hour overflow. This avoids - * messing with unknown time zones but requires your - * RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - /* correct for half hour time zone */ - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) - real_minutes += 30; - real_minutes %= 60; - - if (abs(real_minutes - cmos_minutes) < 30) { - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - real_seconds = bin2bcd(real_seconds); - real_minutes = bin2bcd(real_minutes); - } - CMOS_WRITE(real_seconds, RTC_SECONDS); - CMOS_WRITE(real_minutes, RTC_MINUTES); - } else { - printk_once(KERN_NOTICE - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - - spin_unlock_irqrestore(&rtc_lock, flags); - - return retval; -} - -unsigned long mach_get_cmos_time(void) -{ - unsigned int status, year, mon, day, hour, min, sec, century = 0; - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - - /* - * If UIP is clear, then we have >= 244 microseconds before - * RTC registers will be updated. Spec sheet says that this - * is the reliable way to read RTC - registers. If UIP is set - * then the register access might be invalid. - */ - while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) - cpu_relax(); - - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - -#ifdef CONFIG_ACPI - if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && - acpi_gbl_FADT.century) - century = CMOS_READ(acpi_gbl_FADT.century); -#endif - - status = CMOS_READ(RTC_CONTROL); - WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); - - spin_unlock_irqrestore(&rtc_lock, flags); - - if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { - sec = bcd2bin(sec); - min = bcd2bin(min); - hour = bcd2bin(hour); - day = bcd2bin(day); - mon = bcd2bin(mon); - year = bcd2bin(year); - } - - if (century) { - century = bcd2bin(century); - year += century * 100; - printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); - } else - year += CMOS_YEARS_OFFS; - - return mktime(year, mon, day, hour, min, sec); -} - -/* Routines for accessing the CMOS RAM/RTC. */ -unsigned char rtc_cmos_read(unsigned char addr) -{ - unsigned char val; - - lock_cmos_prefix(addr); - outb(addr, RTC_PORT(0)); - val = inb(RTC_PORT(1)); - lock_cmos_suffix(addr); - - return val; -} -EXPORT_SYMBOL(rtc_cmos_read); - -void rtc_cmos_write(unsigned char val, unsigned char addr) -{ - lock_cmos_prefix(addr); - outb(addr, RTC_PORT(0)); - outb(val, RTC_PORT(1)); - lock_cmos_suffix(addr); -} -EXPORT_SYMBOL(rtc_cmos_write); - -int update_persistent_clock(struct timespec now) -{ - return x86_platform.set_wallclock(now.tv_sec); -} - -/* not static: needed by APM */ -void read_persistent_clock(struct timespec *ts) -{ - unsigned long retval; - - retval = x86_platform.get_wallclock(); - - ts->tv_sec = retval; - ts->tv_nsec = 0; -} - -unsigned long long native_read_tsc(void) -{ - return __native_read_tsc(); -} -EXPORT_SYMBOL(native_read_tsc); - - -static struct resource rtc_resources[] = { - [0] = { - .start = RTC_PORT(0), - .end = RTC_PORT(1), - .flags = IORESOURCE_IO, - }, - [1] = { - .start = RTC_IRQ, - .end = RTC_IRQ, - .flags = IORESOURCE_IRQ, - } -}; - -static struct platform_device rtc_device = { - .name = "rtc_cmos", - .id = -1, - .resource = rtc_resources, - .num_resources = ARRAY_SIZE(rtc_resources), -}; - -static __init int add_rtc_cmos(void) -{ -#ifdef CONFIG_PNP - static const char *ids[] __initconst = - { "PNP0b00", "PNP0b01", "PNP0b02", }; - struct pnp_dev *dev; - struct pnp_id *id; - int i; - - pnp_for_each_dev(dev) { - for (id = dev->id; id; id = id->next) { - for (i = 0; i < ARRAY_SIZE(ids); i++) { - if (compare_pnp_id(id, ids[i]) != 0) - return 0; - } - } - } -#endif - if (of_have_populated_dt()) - return 0; - - /* Intel MID platforms don't have ioport rtc */ - if (mrst_identify_cpu()) - return -ENODEV; - - platform_device_register(&rtc_device); - dev_info(&rtc_device.dev, - "registered platform RTC device (no PNP device found)\n"); - - return 0; -} -device_initcall(add_rtc_cmos); diff --git a/ANDROID_3.4.5/arch/x86/kernel/setup.c b/ANDROID_3.4.5/arch/x86/kernel/setup.c deleted file mode 100644 index 1a290156..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/setup.c +++ /dev/null @@ -1,1060 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * - * Memory region support - * David Parsons <orc@pell.chi.il.us>, July-August 1999 - * - * Added E820 sanitization routine (removes overlapping memory regions); - * Brian Moyle <bmoyle@mvista.com>, February 2001 - * - * Moved CPU detection code to cpu/${cpu}.c - * Patrick Mochel <mochel@osdl.org>, March 2002 - * - * Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach <xela@slit.de>, December 2002. - * - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/mmzone.h> -#include <linux/screen_info.h> -#include <linux/ioport.h> -#include <linux/acpi.h> -#include <linux/sfi.h> -#include <linux/apm_bios.h> -#include <linux/initrd.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/seq_file.h> -#include <linux/console.h> -#include <linux/mca.h> -#include <linux/root_dev.h> -#include <linux/highmem.h> -#include <linux/module.h> -#include <linux/efi.h> -#include <linux/init.h> -#include <linux/edd.h> -#include <linux/iscsi_ibft.h> -#include <linux/nodemask.h> -#include <linux/kexec.h> -#include <linux/dmi.h> -#include <linux/pfn.h> -#include <linux/pci.h> -#include <asm/pci-direct.h> -#include <linux/init_ohci1394_dma.h> -#include <linux/kvm_para.h> - -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/user.h> -#include <linux/delay.h> - -#include <linux/kallsyms.h> -#include <linux/cpufreq.h> -#include <linux/dma-mapping.h> -#include <linux/ctype.h> -#include <linux/uaccess.h> - -#include <linux/percpu.h> -#include <linux/crash_dump.h> -#include <linux/tboot.h> - -#include <video/edid.h> - -#include <asm/mtrr.h> -#include <asm/apic.h> -#include <asm/trampoline.h> -#include <asm/e820.h> -#include <asm/mpspec.h> -#include <asm/setup.h> -#include <asm/efi.h> -#include <asm/timer.h> -#include <asm/i8259.h> -#include <asm/sections.h> -#include <asm/dmi.h> -#include <asm/io_apic.h> -#include <asm/ist.h> -#include <asm/setup_arch.h> -#include <asm/bios_ebda.h> -#include <asm/cacheflush.h> -#include <asm/processor.h> -#include <asm/bugs.h> - -#include <asm/vsyscall.h> -#include <asm/cpu.h> -#include <asm/desc.h> -#include <asm/dma.h> -#include <asm/iommu.h> -#include <asm/gart.h> -#include <asm/mmu_context.h> -#include <asm/proto.h> - -#include <asm/paravirt.h> -#include <asm/hypervisor.h> -#include <asm/olpc_ofw.h> - -#include <asm/percpu.h> -#include <asm/topology.h> -#include <asm/apicdef.h> -#include <asm/amd_nb.h> -#ifdef CONFIG_X86_64 -#include <asm/numa_64.h> -#endif -#include <asm/mce.h> -#include <asm/alternative.h> -#include <asm/prom.h> - -/* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long max_low_pfn_mapped; -unsigned long max_pfn_mapped; - -#ifdef CONFIG_DMI -RESERVE_BRK(dmi_alloc, 65536); -#endif - - -static __initdata unsigned long _brk_start = (unsigned long)__brk_base; -unsigned long _brk_end = (unsigned long)__brk_base; - -#ifdef CONFIG_X86_64 -int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -int default_check_phys_apicid_present(int phys_apicid) -{ - return __default_check_phys_apicid_present(phys_apicid); -} -#endif - -#ifndef CONFIG_DEBUG_BOOT_PARAMS -struct boot_params __initdata boot_params; -#else -struct boot_params boot_params; -#endif - -/* - * Machine setup.. - */ -static struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource bss_resource = { - .name = "Kernel bss", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - - -#ifdef CONFIG_X86_32 -/* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; -/* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; -EXPORT_SYMBOL(boot_cpu_data); -static void set_mca_bus(int x) -{ -#ifdef CONFIG_MCA - MCA_bus = x; -#endif -} - -unsigned int def_to_bigsmp; - -/* for MCA, but anyone else can use it if they want */ -unsigned int machine_id; -unsigned int machine_submodel_id; -unsigned int BIOS_revision; - -struct apm_info apm_info; -EXPORT_SYMBOL(apm_info); - -#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ - defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) -struct ist_info ist_info; -EXPORT_SYMBOL(ist_info); -#else -struct ist_info ist_info; -#endif - -#else -struct cpuinfo_x86 boot_cpu_data __read_mostly = { - .x86_phys_bits = MAX_PHYSMEM_BITS, -}; -EXPORT_SYMBOL(boot_cpu_data); -#endif - - -#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) -unsigned long mmu_cr4_features; -#else -unsigned long mmu_cr4_features = X86_CR4_PAE; -#endif - -/* Boot loader ID and version as integers, for the benefit of proc_dointvec */ -int bootloader_type, bootloader_version; - -/* - * Setup options - */ -struct screen_info screen_info; -EXPORT_SYMBOL(screen_info); -struct edid_info edid_info; -EXPORT_SYMBOL_GPL(edid_info); - -extern int root_mountflags; - -unsigned long saved_video_mode; - -#define RAMDISK_IMAGE_START_MASK 0x07FF -#define RAMDISK_PROMPT_FLAG 0x8000 -#define RAMDISK_LOAD_FLAG 0x4000 - -static char __initdata command_line[COMMAND_LINE_SIZE]; -#ifdef CONFIG_CMDLINE_BOOL -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; -#endif - -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) -struct edd edd; -#ifdef CONFIG_EDD_MODULE -EXPORT_SYMBOL(edd); -#endif -/** - * copy_edd() - Copy the BIOS EDD information - * from boot_params into a safe place. - * - */ -static inline void __init copy_edd(void) -{ - memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, - sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); - edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; - edd.edd_info_nr = boot_params.eddbuf_entries; -} -#else -static inline void __init copy_edd(void) -{ -} -#endif - -void * __init extend_brk(size_t size, size_t align) -{ - size_t mask = align - 1; - void *ret; - - BUG_ON(_brk_start == 0); - BUG_ON(align & mask); - - _brk_end = (_brk_end + mask) & ~mask; - BUG_ON((char *)(_brk_end + size) > __brk_limit); - - ret = (void *)_brk_end; - _brk_end += size; - - memset(ret, 0, size); - - return ret; -} - -#ifdef CONFIG_X86_64 -static void __init init_gbpages(void) -{ - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -} -#else -static inline void init_gbpages(void) -{ -} -static void __init cleanup_highmap(void) -{ -} -#endif - -static void __init reserve_brk(void) -{ - if (_brk_end > _brk_start) - memblock_reserve(__pa(_brk_start), - __pa(_brk_end) - __pa(_brk_start)); - - /* Mark brk area as locked down and no longer taking any - new allocations */ - _brk_start = 0; -} - -#ifdef CONFIG_BLK_DEV_INITRD - -#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) -static void __init relocate_initrd(void) -{ - /* Assume only end is not page aligned */ - u64 ramdisk_image = boot_params.hdr.ramdisk_image; - u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 area_size = PAGE_ALIGN(ramdisk_size); - u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; - u64 ramdisk_here; - unsigned long slop, clen, mapaddr; - char *p, *q; - - /* We need to move the initrd down into lowmem */ - ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, - PAGE_SIZE); - - if (!ramdisk_here) - panic("Cannot find place for new RAMDISK of size %lld\n", - ramdisk_size); - - /* Note: this includes all the lowmem currently occupied by - the initrd, we rely on that fact to keep the data intact. */ - memblock_reserve(ramdisk_here, area_size); - initrd_start = ramdisk_here + PAGE_OFFSET; - initrd_end = initrd_start + ramdisk_size; - printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", - ramdisk_here, ramdisk_here + ramdisk_size); - - q = (char *)initrd_start; - - /* Copy any lowmem portion of the initrd */ - if (ramdisk_image < end_of_lowmem) { - clen = end_of_lowmem - ramdisk_image; - p = (char *)__va(ramdisk_image); - memcpy(q, p, clen); - q += clen; - ramdisk_image += clen; - ramdisk_size -= clen; - } - - /* Copy the highmem portion of the initrd */ - while (ramdisk_size) { - slop = ramdisk_image & ~PAGE_MASK; - clen = ramdisk_size; - if (clen > MAX_MAP_CHUNK-slop) - clen = MAX_MAP_CHUNK-slop; - mapaddr = ramdisk_image & PAGE_MASK; - p = early_memremap(mapaddr, clen+slop); - memcpy(q, p+slop, clen); - early_iounmap(p, clen+slop); - q += clen; - ramdisk_image += clen; - ramdisk_size -= clen; - } - /* high pages is not converted by early_res_to_bootmem */ - ramdisk_image = boot_params.hdr.ramdisk_image; - ramdisk_size = boot_params.hdr.ramdisk_size; - printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to" - " %08llx - %08llx\n", - ramdisk_image, ramdisk_image + ramdisk_size - 1, - ramdisk_here, ramdisk_here + ramdisk_size - 1); -} - -static void __init reserve_initrd(void) -{ - /* Assume only end is not page aligned */ - u64 ramdisk_image = boot_params.hdr.ramdisk_image; - u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; - - if (!boot_params.hdr.type_of_loader || - !ramdisk_image || !ramdisk_size) - return; /* No initrd provided by bootloader */ - - initrd_start = 0; - - if (ramdisk_size >= (end_of_lowmem>>1)) { - memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); - printk(KERN_ERR "initrd too large to handle, " - "disabling initrd\n"); - return; - } - - printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, - ramdisk_end); - - - if (ramdisk_end <= end_of_lowmem) { - /* All in lowmem, easy case */ - /* - * don't need to reserve again, already reserved early - * in i386_start_kernel - */ - initrd_start = ramdisk_image + PAGE_OFFSET; - initrd_end = initrd_start + ramdisk_size; - return; - } - - relocate_initrd(); - - memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); -} -#else -static void __init reserve_initrd(void) -{ -} -#endif /* CONFIG_BLK_DEV_INITRD */ - -static void __init parse_setup_data(void) -{ - struct setup_data *data; - u64 pa_data; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - u32 data_len, map_len; - - map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), - (u64)sizeof(struct setup_data)); - data = early_memremap(pa_data, map_len); - data_len = data->len + sizeof(struct setup_data); - if (data_len > map_len) { - early_iounmap(data, map_len); - data = early_memremap(pa_data, data_len); - map_len = data_len; - } - - switch (data->type) { - case SETUP_E820_EXT: - parse_e820_ext(data); - break; - case SETUP_DTB: - add_dtb(pa_data); - break; - default: - break; - } - pa_data = data->next; - early_iounmap(data, map_len); - } -} - -static void __init e820_reserve_setup_data(void) -{ - struct setup_data *data; - u64 pa_data; - int found = 0; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_memremap(pa_data, sizeof(*data)); - e820_update_range(pa_data, sizeof(*data)+data->len, - E820_RAM, E820_RESERVED_KERN); - found = 1; - pa_data = data->next; - early_iounmap(data, sizeof(*data)); - } - if (!found) - return; - - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); - memcpy(&e820_saved, &e820, sizeof(struct e820map)); - printk(KERN_INFO "extended physical RAM map:\n"); - e820_print_map("reserve setup_data"); -} - -static void __init memblock_x86_reserve_range_setup_data(void) -{ - struct setup_data *data; - u64 pa_data; - - if (boot_params.hdr.version < 0x0209) - return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_memremap(pa_data, sizeof(*data)); - memblock_reserve(pa_data, sizeof(*data) + data->len); - pa_data = data->next; - early_iounmap(data, sizeof(*data)); - } -} - -/* - * --------- Crashkernel reservation ------------------------------ - */ - -#ifdef CONFIG_KEXEC - -/* - * Keep the crash kernel below this limit. On 32 bits earlier kernels - * would limit the kernel to the low 512 MiB due to mapping restrictions. - * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this - * limit once kexec-tools are fixed. - */ -#ifdef CONFIG_X86_32 -# define CRASH_KERNEL_ADDR_MAX (512 << 20) -#else -# define CRASH_KERNEL_ADDR_MAX (896 << 20) -#endif - -static void __init reserve_crashkernel(void) -{ - unsigned long long total_mem; - unsigned long long crash_size, crash_base; - int ret; - - total_mem = memblock_phys_mem_size(); - - ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) - return; - - /* 0 means: find the address automatically */ - if (crash_base <= 0) { - const unsigned long long alignment = 16<<20; /* 16M */ - - /* - * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX - */ - crash_base = memblock_find_in_range(alignment, - CRASH_KERNEL_ADDR_MAX, crash_size, alignment); - - if (!crash_base) { - pr_info("crashkernel reservation failed - No suitable area found.\n"); - return; - } - } else { - unsigned long long start; - - start = memblock_find_in_range(crash_base, - crash_base + crash_size, crash_size, 1<<20); - if (start != crash_base) { - pr_info("crashkernel reservation failed - memory is in use.\n"); - return; - } - } - memblock_reserve(crash_base, crash_size); - - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crash_base >> 20), - (unsigned long)(total_mem >> 20)); - - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - insert_resource(&iomem_resource, &crashk_res); -} -#else -static void __init reserve_crashkernel(void) -{ -} -#endif - -static struct resource standard_io_resources[] = { - { .name = "dma1", .start = 0x00, .end = 0x1f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "pic1", .start = 0x20, .end = 0x21, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "timer0", .start = 0x40, .end = 0x43, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "timer1", .start = 0x50, .end = 0x53, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "keyboard", .start = 0x60, .end = 0x60, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "keyboard", .start = 0x64, .end = 0x64, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "dma page reg", .start = 0x80, .end = 0x8f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "pic2", .start = 0xa0, .end = 0xa1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "dma2", .start = 0xc0, .end = 0xdf, - .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "fpu", .start = 0xf0, .end = 0xff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO } -}; - -void __init reserve_standard_io_resources(void) -{ - int i; - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - -} - -static __init void reserve_ibft_region(void) -{ - unsigned long addr, size = 0; - - addr = find_ibft_region(&size); - - if (size) - memblock_reserve(addr, size); -} - -static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; - -static void __init trim_bios_range(void) -{ - /* - * A special case is the first 4Kb of memory; - * This is a BIOS owned area, not kernel ram, but generally - * not listed as such in the E820 table. - * - * This typically reserves additional memory (64KiB by default) - * since some BIOSes are known to corrupt low memory. See the - * Kconfig help text for X86_RESERVE_LOW. - */ - e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), - E820_RAM, E820_RESERVED); - - /* - * special case: Some BIOSen report the PC BIOS - * area (640->1Mb) as ram even though it is not. - * take them out. - */ - e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); -} - -static int __init parse_reservelow(char *p) -{ - unsigned long long size; - - if (!p) - return -EINVAL; - - size = memparse(p, &p); - - if (size < 4096) - size = 4096; - - if (size > 640*1024) - size = 640*1024; - - reserve_low = size; - - return 0; -} - -early_param("reservelow", parse_reservelow); - -/* - * Determine if we were loaded by an EFI loader. If so, then we have also been - * passed the efi memmap, systab, etc., so we should use these data structures - * for initialization. Note, the efi init code path is determined by the - * global efi_enabled. This allows the same kernel image to be used on existing - * systems (with a traditional BIOS) as well as on EFI systems. - */ -/* - * setup_arch - architecture-specific boot-time initializations - * - * Note: On x86_64, fixmaps are ready for use even before this is called. - */ - -void __init setup_arch(char **cmdline_p) -{ -#ifdef CONFIG_X86_32 - memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); - visws_early_detect(); - - /* - * copy kernel address range established so far and switch - * to the proper swapper page table - */ - clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY, - initial_page_table + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - - load_cr3(swapper_pg_dir); - __flush_tlb_all(); -#else - printk(KERN_INFO "Command line: %s\n", boot_command_line); -#endif - - /* - * If we have OLPC OFW, we might end up relocating the fixmap due to - * reserve_top(), so do this before touching the ioremap area. - */ - olpc_ofw_detect(); - - early_trap_init(); - early_cpu_init(); - early_ioremap_init(); - - setup_olpc_ofw_pgd(); - - ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); - screen_info = boot_params.screen_info; - edid_info = boot_params.edid_info; -#ifdef CONFIG_X86_32 - apm_info.bios = boot_params.apm_bios_info; - ist_info = boot_params.ist_info; - if (boot_params.sys_desc_table.length != 0) { - set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); - machine_id = boot_params.sys_desc_table.table[0]; - machine_submodel_id = boot_params.sys_desc_table.table[1]; - BIOS_revision = boot_params.sys_desc_table.table[2]; - } -#endif - saved_video_mode = boot_params.hdr.vid_mode; - bootloader_type = boot_params.hdr.type_of_loader; - if ((bootloader_type >> 4) == 0xe) { - bootloader_type &= 0xf; - bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4; - } - bootloader_version = bootloader_type & 0xf; - bootloader_version |= boot_params.hdr.ext_loader_ver << 4; - -#ifdef CONFIG_BLK_DEV_RAM - rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); -#endif -#ifdef CONFIG_EFI - if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL32", 4)) { - efi_enabled = 1; - efi_64bit = false; - } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, - "EL64", 4)) { - efi_enabled = 1; - efi_64bit = true; - } - if (efi_enabled && efi_memblock_x86_reserve_range()) - efi_enabled = 0; -#endif - - x86_init.oem.arch_setup(); - - iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; - setup_memory_map(); - parse_setup_data(); - /* update the e820_saved too */ - e820_reserve_setup_data(); - - copy_edd(); - - if (!boot_params.hdr.root_flags) - root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = _brk_end; - - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); - data_resource.end = virt_to_phys(_edata)-1; - bss_resource.start = virt_to_phys(&__bss_start); - bss_resource.end = virt_to_phys(&__bss_stop)-1; - -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0]) { - /* append boot loader cmdline to builtin */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; - - /* - * x86_configure_nx() is called before parse_early_param() to detect - * whether hardware doesn't support NX (so that the early EHCI debug - * console setup can safely call set_fixmap()). It may then be called - * again from within noexec_setup() during parsing early parameters - * to honor the respective command line option. - */ - x86_configure_nx(); - - parse_early_param(); - - x86_report_nx(); - - /* after early param, so could get panic from serial */ - memblock_x86_reserve_range_setup_data(); - - if (acpi_mps_check()) { -#ifdef CONFIG_X86_LOCAL_APIC - disable_apic = 1; -#endif - setup_clear_cpu_cap(X86_FEATURE_APIC); - } - -#ifdef CONFIG_PCI - if (pci_early_dump_regs) - early_dump_pci_devices(); -#endif - - finish_e820_parsing(); - - if (efi_enabled) - efi_init(); - - dmi_scan_machine(); - - /* - * VMware detection requires dmi to be available, so this - * needs to be done after dmi_scan_machine, for the BP. - */ - init_hypervisor_platform(); - - x86_init.resources.probe_roms(); - - /* after parse_early_param, so could debug it */ - insert_resource(&iomem_resource, &code_resource); - insert_resource(&iomem_resource, &data_resource); - insert_resource(&iomem_resource, &bss_resource); - - trim_bios_range(); -#ifdef CONFIG_X86_32 - if (ppro_with_ram_bug()) { - e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, - E820_RESERVED); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); - printk(KERN_INFO "fixed physical RAM map:\n"); - e820_print_map("bad_ppro"); - } -#else - early_gart_iommu_check(); -#endif - - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - max_pfn = e820_end_of_ram_pfn(); - - /* update e820 for memory not covered by WB MTRRs */ - mtrr_bp_init(); - if (mtrr_trim_uncached_memory(max_pfn)) - max_pfn = e820_end_of_ram_pfn(); - -#ifdef CONFIG_X86_32 - /* max_low_pfn get updated here */ - find_low_pfn_range(); -#else - num_physpages = max_pfn; - - check_x2apic(); - - /* How many end-of-memory variables you have, grandma! */ - /* need this before calling reserve_initrd */ - if (max_pfn > (1UL<<(32 - PAGE_SHIFT))) - max_low_pfn = e820_end_of_low_ram_pfn(); - else - max_low_pfn = max_pfn; - - high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; -#endif - - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); - - reserve_ibft_region(); - - /* - * Need to conclude brk, before memblock_x86_fill() - * it could use memblock_find_in_range, could overlap with - * brk area. - */ - reserve_brk(); - - cleanup_highmap(); - - memblock.current_limit = get_max_mapped(); - memblock_x86_fill(); - - /* - * The EFI specification says that boot service code won't be called - * after ExitBootServices(). This is, in fact, a lie. - */ - if (efi_enabled) - efi_reserve_boot_services(); - - /* preallocate 4k for mptable mpc */ - early_reserve_e820_mpc_new(); - -#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION - setup_bios_corruption_check(); -#endif - - printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", - max_pfn_mapped<<PAGE_SHIFT); - - setup_trampolines(); - - init_gbpages(); - - /* max_pfn_mapped is updated here */ - max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); - max_pfn_mapped = max_low_pfn_mapped; - -#ifdef CONFIG_X86_64 - if (max_pfn > max_low_pfn) { - max_pfn_mapped = init_memory_mapping(1UL<<32, - max_pfn<<PAGE_SHIFT); - /* can we preseve max_low_pfn ?*/ - max_low_pfn = max_pfn; - } -#endif - memblock.current_limit = get_max_mapped(); - - /* - * NOTE: On x86-32, only from this point on, fixmaps are ready for use. - */ - -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - if (init_ohci1394_dma_early) - init_ohci1394_dma_on_all_controllers(); -#endif - /* Allocate bigger log buffer */ - setup_log_buf(1); - - reserve_initrd(); - - reserve_crashkernel(); - - vsmp_init(); - - io_delay_init(); - - /* - * Parse the ACPI tables for possible boot-time SMP configuration. - */ - acpi_boot_table_init(); - - early_acpi_boot_init(); - - initmem_init(); - memblock_find_dma_reserve(); - -#ifdef CONFIG_KVM_CLOCK - kvmclock_init(); -#endif - - x86_init.paging.pagetable_setup_start(swapper_pg_dir); - paging_init(); - x86_init.paging.pagetable_setup_done(swapper_pg_dir); - - if (boot_cpu_data.cpuid_level >= 0) { - /* A CPU has %cr4 if and only if it has CPUID */ - mmu_cr4_features = read_cr4(); - } - -#ifdef CONFIG_X86_32 - /* sync back kernel address range */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); -#endif - - tboot_probe(); - -#ifdef CONFIG_X86_64 - map_vsyscall(); -#endif - - generic_apic_probe(); - - early_quirks(); - - /* - * Read APIC and some other early information from ACPI tables. - */ - acpi_boot_init(); - sfi_init(); - x86_dtb_init(); - - /* - * get boot-time SMP configuration: - */ - if (smp_found_config) - get_smp_config(); - - prefill_possible_map(); - - init_cpu_to_node(); - - init_apic_mappings(); - ioapic_and_gsi_init(); - - kvm_guest_init(); - - e820_reserve_resources(); - e820_mark_nosave_regions(max_low_pfn); - - x86_init.resources.reserve_resources(); - - e820_setup_gap(); - -#ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif - x86_init.oem.banner(); - - x86_init.timers.wallclock_init(); - - x86_platform.wallclock_init(); - - mcheck_init(); - - arch_init_ideal_nops(); -} - -#ifdef CONFIG_X86_32 - -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -void __init i386_reserve_resources(void) -{ - request_resource(&iomem_resource, &video_ram_resource); - reserve_standard_io_resources(); -} - -#endif /* CONFIG_X86_32 */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/setup_percpu.c b/ANDROID_3.4.5/arch/x86/kernel/setup_percpu.c deleted file mode 100644 index 5a98aa27..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/setup_percpu.c +++ /dev/null @@ -1,287 +0,0 @@ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/percpu.h> -#include <linux/kexec.h> -#include <linux/crash_dump.h> -#include <linux/smp.h> -#include <linux/topology.h> -#include <linux/pfn.h> -#include <asm/sections.h> -#include <asm/processor.h> -#include <asm/setup.h> -#include <asm/mpspec.h> -#include <asm/apicdef.h> -#include <asm/highmem.h> -#include <asm/proto.h> -#include <asm/cpumask.h> -#include <asm/cpu.h> -#include <asm/stackprotector.h> - -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - -#ifdef CONFIG_X86_64 -#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) -#else -#define BOOT_PERCPU_OFFSET 0 -#endif - -DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { - [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, -}; -EXPORT_SYMBOL(__per_cpu_offset); - -/* - * On x86_64 symbols referenced from code should be reachable using - * 32bit relocations. Reserve space for static percpu variables in - * modules so that they are always served from the first chunk which - * is located at the percpu segment base. On x86_32, anything can - * address anywhere. No need to reserve space in the first chunk. - */ -#ifdef CONFIG_X86_64 -#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE -#else -#define PERCPU_FIRST_CHUNK_RESERVE 0 -#endif - -#ifdef CONFIG_X86_32 -/** - * pcpu_need_numa - determine percpu allocation needs to consider NUMA - * - * If NUMA is not configured or there is only one NUMA node available, - * there is no reason to consider NUMA. This function determines - * whether percpu allocation should consider NUMA or not. - * - * RETURNS: - * true if NUMA should be considered; otherwise, false. - */ -static bool __init pcpu_need_numa(void) -{ -#ifdef CONFIG_NEED_MULTIPLE_NODES - pg_data_t *last = NULL; - unsigned int cpu; - - for_each_possible_cpu(cpu) { - int node = early_cpu_to_node(cpu); - - if (node_online(node) && NODE_DATA(node) && - last && last != NODE_DATA(node)) - return true; - - last = NODE_DATA(node); - } -#endif - return false; -} -#endif - -/** - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu - * @cpu: cpu to allocate for - * @size: size allocation in bytes - * @align: alignment - * - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper - * does the right thing for NUMA regardless of the current - * configuration. - * - * RETURNS: - * Pointer to the allocated area on success, NULL on failure. - */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, - unsigned long align) -{ - const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NEED_MULTIPLE_NODES - int node = early_cpu_to_node(cpu); - void *ptr; - - if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem_nopanic(size, align, goal); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", - cpu, size, __pa(ptr)); - } else { - ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), - size, align, goal); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", - cpu, size, node, __pa(ptr)); - } - return ptr; -#else - return __alloc_bootmem_nopanic(size, align, goal); -#endif -} - -/* - * Helpers for first chunk memory allocation - */ -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) -{ - return pcpu_alloc_bootmem(cpu, size, align); -} - -static void __init pcpu_fc_free(void *ptr, size_t size) -{ - free_bootmem(__pa(ptr), size); -} - -static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) -{ -#ifdef CONFIG_NEED_MULTIPLE_NODES - if (early_cpu_to_node(from) == early_cpu_to_node(to)) - return LOCAL_DISTANCE; - else - return REMOTE_DISTANCE; -#else - return LOCAL_DISTANCE; -#endif -} - -static void __init pcpup_populate_pte(unsigned long addr) -{ - populate_extra_pte(addr); -} - -static inline void setup_percpu_segment(int cpu) -{ -#ifdef CONFIG_X86_32 - struct desc_struct gdt; - - pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - write_gdt_entry(get_cpu_gdt_table(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); -#endif -} - -void __init setup_per_cpu_areas(void) -{ - unsigned int cpu; - unsigned long delta; - int rc; - - pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", - NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - - /* - * Allocate percpu area. Embedding allocator is our favorite; - * however, on NUMA configurations, it can result in very - * sparse unit mapping and vmalloc area isn't spacious enough - * on 32bit. Use page in that case. - */ -#ifdef CONFIG_X86_32 - if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) - pcpu_chosen_fc = PCPU_FC_PAGE; -#endif - rc = -EINVAL; - if (pcpu_chosen_fc != PCPU_FC_PAGE) { - const size_t dyn_size = PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; - size_t atom_size; - - /* - * On 64bit, use PMD_SIZE for atom_size so that embedded - * percpu areas are aligned to PMD. This, in the future, - * can also allow using PMD mappings in vmalloc area. Use - * PAGE_SIZE on 32bit as vmalloc space is highly contended - * and large vmalloc area allocs can easily fail. - */ -#ifdef CONFIG_X86_64 - atom_size = PMD_SIZE; -#else - atom_size = PAGE_SIZE; -#endif - rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - dyn_size, atom_size, - pcpu_cpu_distance, - pcpu_fc_alloc, pcpu_fc_free); - if (rc < 0) - pr_warning("%s allocator failed (%d), falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], rc); - } - if (rc < 0) - rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - pcpu_fc_alloc, pcpu_fc_free, - pcpup_populate_pte); - if (rc < 0) - panic("cannot initialize percpu area (err=%d)", rc); - - /* alrighty, percpu areas up and running */ - delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; - for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; - per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); - per_cpu(cpu_number, cpu) = cpu; - setup_percpu_segment(cpu); - setup_stack_canary_segment(cpu); - /* - * Copy data used in early init routines from the - * initial arrays to the per cpu data areas. These - * arrays then become expendable and the *_early_ptr's - * are zeroed indicating that the static arrays are - * gone. - */ -#ifdef CONFIG_X86_LOCAL_APIC - per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); - per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#endif -#ifdef CONFIG_X86_32 - per_cpu(x86_cpu_to_logical_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); -#endif -#ifdef CONFIG_X86_64 - per_cpu(irq_stack_ptr, cpu) = - per_cpu(irq_stack_union.irq_stack, cpu) + - IRQ_STACK_SIZE - 64; -#endif -#ifdef CONFIG_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); - /* - * Ensure that the boot cpu numa_node is correct when the boot - * cpu is on a node that doesn't have memory installed. - * Also cpu_up() will call cpu_to_node() for APs when - * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set - * up later with c_init aka intel_init/amd_init. - * So set them all (boot cpu and all APs). - */ - set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); -#endif - /* - * Up to this point, the boot CPU has been using .init.data - * area. Reload any changed state for the boot CPU. - */ - if (!cpu) - switch_to_new_gdt(cpu); - } - - /* indicate the early static arrays will soon be gone */ -#ifdef CONFIG_X86_LOCAL_APIC - early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; - early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#endif -#ifdef CONFIG_X86_32 - early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; -#endif -#ifdef CONFIG_NUMA - early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; -#endif - - /* Setup node to cpumask map */ - setup_node_to_cpumask_map(); - - /* Setup cpu initialized, callin, callout masks */ - setup_cpu_local_masks(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/signal.c b/ANDROID_3.4.5/arch/x86/kernel/signal.c deleted file mode 100644 index 115eac43..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/signal.c +++ /dev/null @@ -1,959 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs - * - * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson - * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes - * 2000-2002 x86-64 support by Andi Kleen - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/tracehook.h> -#include <linux/unistd.h> -#include <linux/stddef.h> -#include <linux/personality.h> -#include <linux/uaccess.h> -#include <linux/user-return-notifier.h> - -#include <asm/processor.h> -#include <asm/ucontext.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/vdso.h> -#include <asm/mce.h> -#include <asm/sighandling.h> - -#ifdef CONFIG_X86_64 -#include <asm/proto.h> -#include <asm/ia32_unistd.h> -#include <asm/sys_ia32.h> -#endif /* CONFIG_X86_64 */ - -#include <asm/syscall.h> -#include <asm/syscalls.h> - -#include <asm/sigframe.h> - -#ifdef CONFIG_X86_32 -# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) -#else -# define FIX_EFLAGS __FIX_EFLAGS -#endif - -#define COPY(x) do { \ - get_user_ex(regs->x, &sc->x); \ -} while (0) - -#define GET_SEG(seg) ({ \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - tmp; \ -}) - -#define COPY_SEG(seg) do { \ - regs->seg = GET_SEG(seg); \ -} while (0) - -#define COPY_SEG_CPL3(seg) do { \ - regs->seg = GET_SEG(seg) | 3; \ -} while (0) - -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, - unsigned long *pax) -{ - void __user *buf; - unsigned int tmpflags; - unsigned int err = 0; - - /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; - - get_user_try { - -#ifdef CONFIG_X86_32 - set_user_gs(regs, GET_SEG(gs)); - COPY_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); -#endif /* CONFIG_X86_32 */ - - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); - -#ifdef CONFIG_X86_64 - COPY(r8); - COPY(r9); - COPY(r10); - COPY(r11); - COPY(r12); - COPY(r13); - COPY(r14); - COPY(r15); -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_X86_32 - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); -#else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); -#endif /* CONFIG_X86_32 */ - - get_user_ex(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ - - get_user_ex(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); - - get_user_ex(*pax, &sc->ax); - } get_user_catch(err); - - return err; -} - -int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, - struct pt_regs *regs, unsigned long mask) -{ - int err = 0; - - put_user_try { - -#ifdef CONFIG_X86_32 - put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs); - put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); - put_user_ex(regs->es, (unsigned int __user *)&sc->es); - put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); -#endif /* CONFIG_X86_32 */ - - put_user_ex(regs->di, &sc->di); - put_user_ex(regs->si, &sc->si); - put_user_ex(regs->bp, &sc->bp); - put_user_ex(regs->sp, &sc->sp); - put_user_ex(regs->bx, &sc->bx); - put_user_ex(regs->dx, &sc->dx); - put_user_ex(regs->cx, &sc->cx); - put_user_ex(regs->ax, &sc->ax); -#ifdef CONFIG_X86_64 - put_user_ex(regs->r8, &sc->r8); - put_user_ex(regs->r9, &sc->r9); - put_user_ex(regs->r10, &sc->r10); - put_user_ex(regs->r11, &sc->r11); - put_user_ex(regs->r12, &sc->r12); - put_user_ex(regs->r13, &sc->r13); - put_user_ex(regs->r14, &sc->r14); - put_user_ex(regs->r15, &sc->r15); -#endif /* CONFIG_X86_64 */ - - put_user_ex(current->thread.trap_nr, &sc->trapno); - put_user_ex(current->thread.error_code, &sc->err); - put_user_ex(regs->ip, &sc->ip); -#ifdef CONFIG_X86_32 - put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); - put_user_ex(regs->flags, &sc->flags); - put_user_ex(regs->sp, &sc->sp_at_signal); - put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); -#else /* !CONFIG_X86_32 */ - put_user_ex(regs->flags, &sc->flags); - put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->gs); - put_user_ex(0, &sc->fs); -#endif /* CONFIG_X86_32 */ - - put_user_ex(fpstate, &sc->fpstate); - - /* non-iBCS2 extensions.. */ - put_user_ex(mask, &sc->oldmask); - put_user_ex(current->thread.cr2, &sc->cr2); - } put_user_catch(err); - - return err; -} - -/* - * Set up a signal frame. - */ - -/* - * Determine which stack to use.. - */ -static unsigned long align_sigframe(unsigned long sp) -{ -#ifdef CONFIG_X86_32 - /* - * Align the stack pointer according to the i386 ABI, - * i.e. so that on function entry ((sp + 4) & 15) == 0. - */ - sp = ((sp + 4) & -16ul) - 4; -#else /* !CONFIG_X86_32 */ - sp = round_down(sp, 16) - 8; -#endif - return sp; -} - -static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void __user **fpstate) -{ - /* Default to using normal stack */ - unsigned long sp = regs->sp; - int onsigstack = on_sig_stack(sp); - -#ifdef CONFIG_X86_64 - /* redzone */ - sp -= 128; -#endif /* CONFIG_X86_64 */ - - if (!onsigstack) { - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (current->sas_ss_size) - sp = current->sas_ss_sp + current->sas_ss_size; - } else { -#ifdef CONFIG_X86_32 - /* This is the legacy signal stack switching. */ - if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) - sp = (unsigned long) ka->sa.sa_restorer; -#endif /* CONFIG_X86_32 */ - } - } - - if (used_math()) { - sp -= sig_xstate_size; -#ifdef CONFIG_X86_64 - sp = round_down(sp, 64); -#endif /* CONFIG_X86_64 */ - *fpstate = (void __user *)sp; - } - - sp = align_sigframe(sp - frame_size); - - /* - * If we are on the alternate signal stack and would overflow it, don't. - * Return an always-bogus address instead so we will die with SIGSEGV. - */ - if (onsigstack && !likely(on_sig_stack(sp))) - return (void __user *)-1L; - - /* save i387 state */ - if (used_math() && save_i387_xstate(*fpstate) < 0) - return (void __user *)-1L; - - return (void __user *)sp; -} - -#ifdef CONFIG_X86_32 -static const struct { - u16 poplmovl; - u32 val; - u16 int80; -} __attribute__((packed)) retcode = { - 0xb858, /* popl %eax; movl $..., %eax */ - __NR_sigreturn, - 0x80cd, /* int $0x80 */ -}; - -static const struct { - u8 movl; - u32 val; - u16 int80; - u8 pad; -} __attribute__((packed)) rt_retcode = { - 0xb8, /* movl $..., %eax */ - __NR_rt_sigreturn, - 0x80cd, /* int $0x80 */ - 0 -}; - -static int -__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, - struct pt_regs *regs) -{ - struct sigframe __user *frame; - void __user *restorer; - int err = 0; - void __user *fpstate = NULL; - - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - if (__put_user(sig, &frame->sig)) - return -EFAULT; - - if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0])) - return -EFAULT; - - if (_NSIG_WORDS > 1) { - if (__copy_to_user(&frame->extramask, &set->sig[1], - sizeof(frame->extramask))) - return -EFAULT; - } - - if (current->mm->context.vdso) - restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); - else - restorer = &frame->retcode; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - - /* Set up to return from userspace. */ - err |= __put_user(restorer, &frame->pretcode); - - /* - * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80 - * - * WE DO NOT USE IT ANY MORE! It's only left here for historical - * reasons and because gdb uses it as a signature to notice - * signal handler stack frames. - */ - err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->sp = (unsigned long)frame; - regs->ip = (unsigned long)ka->sa.sa_handler; - regs->ax = (unsigned long)sig; - regs->dx = 0; - regs->cx = 0; - - regs->ds = __USER_DS; - regs->es = __USER_DS; - regs->ss = __USER_DS; - regs->cs = __USER_CS; - - return 0; -} - -static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) -{ - struct rt_sigframe __user *frame; - void __user *restorer; - int err = 0; - void __user *fpstate = NULL; - - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - put_user_try { - put_user_ex(sig, &frame->sig); - put_user_ex(&frame->info, &frame->pinfo); - put_user_ex(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - - /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); - put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - /* Set up to return from userspace. */ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - put_user_ex(restorer, &frame->pretcode); - - /* - * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 - * - * WE DO NOT USE IT ANY MORE! It's only left here for historical - * reasons and because gdb uses it as a signature to notice - * signal handler stack frames. - */ - put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); - } put_user_catch(err); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->sp = (unsigned long)frame; - regs->ip = (unsigned long)ka->sa.sa_handler; - regs->ax = (unsigned long)sig; - regs->dx = (unsigned long)&frame->info; - regs->cx = (unsigned long)&frame->uc; - - regs->ds = __USER_DS; - regs->es = __USER_DS; - regs->ss = __USER_DS; - regs->cs = __USER_CS; - - return 0; -} -#else /* !CONFIG_X86_32 */ -static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) -{ - struct rt_sigframe __user *frame; - void __user *fp = NULL; - int err = 0; - struct task_struct *me = current; - - frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user(&frame->info, info)) - return -EFAULT; - } - - put_user_try { - /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); - put_user_ex(0, &frame->uc.uc_link); - put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) { - put_user_ex(ka->sa.sa_restorer, &frame->pretcode); - } else { - /* could use a vstub here */ - err |= -EFAULT; - } - } put_user_catch(err); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->di = sig; - /* In case the signal handler was declared without prototypes */ - regs->ax = 0; - - /* This also works for non SA_SIGINFO handlers because they expect the - next argument after the signal number on the stack. */ - regs->si = (unsigned long)&frame->info; - regs->dx = (unsigned long)&frame->uc; - regs->ip = (unsigned long) ka->sa.sa_handler; - - regs->sp = (unsigned long)frame; - - /* Set up the CS register to run signal handlers in 64-bit mode, - even if the handler happens to be interrupting 32-bit code. */ - regs->cs = __USER_CS; - - return 0; -} -#endif /* CONFIG_X86_32 */ - -#ifdef CONFIG_X86_32 -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - sigset_t blocked; - - current->saved_sigmask = current->blocked; - - mask &= _BLOCKABLE; - siginitset(&blocked, mask); - set_current_blocked(&blocked); - - current->state = TASK_INTERRUPTIBLE; - schedule(); - - set_restore_sigmask(); - return -ERESTARTNOHAND; -} - -asmlinkage int -sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret = 0; - - if (act) { - old_sigset_t mask; - - if (!access_ok(VERIFY_READ, act, sizeof(*act))) - return -EFAULT; - - get_user_try { - get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); - get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); - get_user_ex(mask, &act->sa_mask); - get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); - } get_user_catch(ret); - - if (ret) - return -EFAULT; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) - return -EFAULT; - - put_user_try { - put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); - put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); - put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); - } put_user_catch(ret); - - if (ret) - return -EFAULT; - } - - return ret; -} -#endif /* CONFIG_X86_32 */ - -long -sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, - struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->sp); -} - -/* - * Do a signal return; undo the signal stack. - */ -#ifdef CONFIG_X86_32 -unsigned long sys_sigreturn(struct pt_regs *regs) -{ - struct sigframe __user *frame; - unsigned long ax; - sigset_t set; - - frame = (struct sigframe __user *)(regs->sp - 8); - - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 - && __copy_from_user(&set.sig[1], &frame->extramask, - sizeof(frame->extramask)))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - set_current_blocked(&set); - - if (restore_sigcontext(regs, &frame->sc, &ax)) - goto badframe; - return ax; - -badframe: - signal_fault(regs, frame, "sigreturn"); - - return 0; -} -#endif /* CONFIG_X86_32 */ - -long sys_rt_sigreturn(struct pt_regs *regs) -{ - struct rt_sigframe __user *frame; - unsigned long ax; - sigset_t set; - - frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - set_current_blocked(&set); - - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) - goto badframe; - - if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) - goto badframe; - - return ax; - -badframe: - signal_fault(regs, frame, "rt_sigreturn"); - return 0; -} - -/* - * OK, we're invoking a handler: - */ -static int signr_convert(int sig) -{ -#ifdef CONFIG_X86_32 - struct thread_info *info = current_thread_info(); - - if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) - return info->exec_domain->signal_invmap[sig]; -#endif /* CONFIG_X86_32 */ - return sig; -} - -#ifdef CONFIG_X86_32 - -#define is_ia32 1 -#define ia32_setup_frame __setup_frame -#define ia32_setup_rt_frame __setup_rt_frame - -#else /* !CONFIG_X86_32 */ - -#ifdef CONFIG_IA32_EMULATION -#define is_ia32 test_thread_flag(TIF_IA32) -#else /* !CONFIG_IA32_EMULATION */ -#define is_ia32 0 -#endif /* CONFIG_IA32_EMULATION */ - -#ifdef CONFIG_X86_X32_ABI -#define is_x32 test_thread_flag(TIF_X32) - -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs); -#else /* !CONFIG_X86_X32_ABI */ -#define is_x32 0 -#endif /* CONFIG_X86_X32_ABI */ - -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs *regs); - -#endif /* CONFIG_X86_32 */ - -static int -setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - struct pt_regs *regs) -{ - int usig = signr_convert(sig); - sigset_t *set = ¤t->blocked; - int ret; - - if (current_thread_info()->status & TS_RESTORE_SIGMASK) - set = ¤t->saved_sigmask; - - /* Set up the stack frame */ - if (is_ia32) { - if (ka->sa.sa_flags & SA_SIGINFO) - ret = ia32_setup_rt_frame(usig, ka, info, set, regs); - else - ret = ia32_setup_frame(usig, ka, set, regs); -#ifdef CONFIG_X86_X32_ABI - } else if (is_x32) { - ret = x32_setup_rt_frame(usig, ka, info, - (compat_sigset_t *)set, regs); -#endif - } else { - ret = __setup_rt_frame(sig, ka, info, set, regs); - } - - if (ret) { - force_sigsegv(sig, current); - return -EFAULT; - } - - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - return ret; -} - -static int -handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, - struct pt_regs *regs) -{ - int ret; - - /* Are we from a system call? */ - if (syscall_get_nr(current, regs) >= 0) { - /* If so, check system call restarting.. */ - switch (syscall_get_error(current, regs)) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->ax = -EINTR; - break; - - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->ax = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - regs->ax = regs->orig_ax; - regs->ip -= 2; - break; - } - } - - /* - * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF - * flag so that register information in the sigcontext is correct. - */ - if (unlikely(regs->flags & X86_EFLAGS_TF) && - likely(test_and_clear_thread_flag(TIF_FORCED_TF))) - regs->flags &= ~X86_EFLAGS_TF; - - ret = setup_rt_frame(sig, ka, info, regs); - - if (ret) - return ret; - - /* - * Clear the direction flag as per the ABI for function entry. - */ - regs->flags &= ~X86_EFLAGS_DF; - - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~X86_EFLAGS_TF; - - block_sigmask(ka, sig); - - tracehook_signal_handler(sig, info, ka, regs, - test_thread_flag(TIF_SINGLESTEP)); - - return 0; -} - -#ifdef CONFIG_X86_32 -#define NR_restart_syscall __NR_restart_syscall -#else /* !CONFIG_X86_32 */ -#define NR_restart_syscall \ - test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall -#endif /* CONFIG_X86_32 */ - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -static void do_signal(struct pt_regs *regs) -{ - struct k_sigaction ka; - siginfo_t info; - int signr; - - /* - * We want the common case to go fast, which is why we may in certain - * cases get here from kernel mode. Just return without doing anything - * if so. - * X86_32: vm86 regs switched out by assembly code before reaching - * here, so testing against kernel CS suffices. - */ - if (!user_mode(regs)) - return; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - if (signr > 0) { - /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, regs); - return; - } - - /* Did we come from a system call? */ - if (syscall_get_nr(current, regs) >= 0) { - /* Restart the system call - no handlers present */ - switch (syscall_get_error(current, regs)) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->ax = regs->orig_ax; - regs->ip -= 2; - break; - - case -ERESTART_RESTARTBLOCK: - regs->ax = NR_restart_syscall; - regs->ip -= 2; - break; - } - } - - /* - * If there's no signal to deliver, we just put the saved sigmask - * back. - */ - if (current_thread_info()->status & TS_RESTORE_SIGMASK) { - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - set_current_blocked(¤t->saved_sigmask); - } -} - -/* - * notification of userspace execution resumption - * - triggered by the TIF_WORK_MASK flags - */ -void -do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) -{ -#ifdef CONFIG_X86_MCE - /* notify userspace of pending MCEs */ - if (thread_info_flags & _TIF_MCE_NOTIFY) - mce_notify_process(); -#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ - - /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - if (current->replacement_session_keyring) - key_replace_session_keyring(); - } - if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) - fire_user_return_notifiers(); - -#ifdef CONFIG_X86_32 - clear_thread_flag(TIF_IRET); -#endif /* CONFIG_X86_32 */ -} - -void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -{ - struct task_struct *me = current; - - if (show_unhandled_signals && printk_ratelimit()) { - printk("%s" - "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", - task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, - me->comm, me->pid, where, frame, - regs->ip, regs->sp, regs->orig_ax); - print_vma_addr(" in ", regs->ip); - printk(KERN_CONT "\n"); - } - - force_sig(SIGSEGV, me); -} - -#ifdef CONFIG_X86_X32_ABI -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs) -{ - struct rt_sigframe_x32 __user *frame; - void __user *restorer; - int err = 0; - void __user *fpstate = NULL; - - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, info)) - return -EFAULT; - } - - put_user_try { - /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); - put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - put_user_ex(0, &frame->uc.uc__pad0); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - if (ka->sa.sa_flags & SA_RESTORER) { - restorer = ka->sa.sa_restorer; - } else { - /* could use a vstub here */ - restorer = NULL; - err |= -EFAULT; - } - put_user_ex(restorer, &frame->pretcode); - } put_user_catch(err); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; - - /* We use the x32 calling convention here... */ - regs->di = sig; - regs->si = (unsigned long) &frame->info; - regs->dx = (unsigned long) &frame->uc; - - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); - - regs->cs = __USER_CS; - regs->ss = __USER_DS; - - return 0; -} - -asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) -{ - struct rt_sigframe_x32 __user *frame; - sigset_t set; - unsigned long ax; - struct pt_regs tregs; - - frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); - - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - set_current_blocked(&set); - - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) - goto badframe; - - tregs = *regs; - if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) - goto badframe; - - return ax; - -badframe: - signal_fault(regs, frame, "x32 rt_sigreturn"); - return 0; -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/smp.c b/ANDROID_3.4.5/arch/x86/kernel/smp.c deleted file mode 100644 index 66c74f48..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/smp.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Intel SMP support routines. - * - * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> - * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com> - * (c) 2002,2003 Andi Kleen, SuSE Labs. - * - * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> - * - * This code is released under the GNU General Public License version 2 or - * later. - */ - -#include <linux/init.h> - -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/kernel_stat.h> -#include <linux/mc146818rtc.h> -#include <linux/cache.h> -#include <linux/interrupt.h> -#include <linux/cpu.h> -#include <linux/gfp.h> - -#include <asm/mtrr.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/proto.h> -#include <asm/apic.h> -#include <asm/nmi.h> -/* - * Some notes on x86 processor bugs affecting SMP operation: - * - * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. - * The Linux implications for SMP are handled as follows: - * - * Pentium III / [Xeon] - * None of the E1AP-E3AP errata are visible to the user. - * - * E1AP. see PII A1AP - * E2AP. see PII A2AP - * E3AP. see PII A3AP - * - * Pentium II / [Xeon] - * None of the A1AP-A3AP errata are visible to the user. - * - * A1AP. see PPro 1AP - * A2AP. see PPro 2AP - * A3AP. see PPro 7AP - * - * Pentium Pro - * None of 1AP-9AP errata are visible to the normal user, - * except occasional delivery of 'spurious interrupt' as trap #15. - * This is very rare and a non-problem. - * - * 1AP. Linux maps APIC as non-cacheable - * 2AP. worked around in hardware - * 3AP. fixed in C0 and above steppings microcode update. - * Linux does not use excessive STARTUP_IPIs. - * 4AP. worked around in hardware - * 5AP. symmetric IO mode (normal Linux operation) not affected. - * 'noapic' mode has vector 0xf filled out properly. - * 6AP. 'noapic' mode might be affected - fixed in later steppings - * 7AP. We do not assume writes to the LVT deassering IRQs - * 8AP. We do not enable low power mode (deep sleep) during MP bootup - * 9AP. We do not use mixed mode - * - * Pentium - * There is a marginal case where REP MOVS on 100MHz SMP - * machines with B stepping processors can fail. XXX should provide - * an L1cache=Writethrough or L1cache=off option. - * - * B stepping CPUs may hang. There are hardware work arounds - * for this. We warn about it in case your board doesn't have the work - * arounds. Basically that's so I can tell anyone with a B stepping - * CPU and SMP problems "tough". - * - * Specific items [From Pentium Processor Specification Update] - * - * 1AP. Linux doesn't use remote read - * 2AP. Linux doesn't trust APIC errors - * 3AP. We work around this - * 4AP. Linux never generated 3 interrupts of the same priority - * to cause a lost local interrupt. - * 5AP. Remote read is never used - * 6AP. not affected - worked around in hardware - * 7AP. not affected - worked around in hardware - * 8AP. worked around in hardware - we get explicit CS errors if not - * 9AP. only 'noapic' mode affected. Might generate spurious - * interrupts, we log only the first one and count the - * rest silently. - * 10AP. not affected - worked around in hardware - * 11AP. Linux reads the APIC between writes to avoid this, as per - * the documentation. Make sure you preserve this as it affects - * the C stepping chips too. - * 12AP. not affected - worked around in hardware - * 13AP. not affected - worked around in hardware - * 14AP. we always deassert INIT during bootup - * 15AP. not affected - worked around in hardware - * 16AP. not affected - worked around in hardware - * 17AP. not affected - worked around in hardware - * 18AP. not affected - worked around in hardware - * 19AP. not affected - worked around in BIOS - * - * If this sounds worrying believe me these bugs are either ___RARE___, - * or are signal timing bugs worked around in hardware and there's - * about nothing of note with C stepping upwards. - */ - -/* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static void native_smp_send_reschedule(int cpu) -{ - if (unlikely(cpu_is_offline(cpu))) { - WARN_ON(1); - return; - } - apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); -} - -void native_send_call_func_single_ipi(int cpu) -{ - apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); -} - -void native_send_call_func_ipi(const struct cpumask *mask) -{ - cpumask_var_t allbutself; - - if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - return; - } - - cpumask_copy(allbutself, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), allbutself); - - if (cpumask_equal(mask, allbutself) && - cpumask_equal(cpu_online_mask, cpu_callout_mask)) - apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - free_cpumask_var(allbutself); -} - -static atomic_t stopping_cpu = ATOMIC_INIT(-1); - -static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) -{ - /* We are registered on stopping cpu too, avoid spurious NMI */ - if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) - return NMI_HANDLED; - - stop_this_cpu(NULL); - - return NMI_HANDLED; -} - -static void native_nmi_stop_other_cpus(int wait) -{ - unsigned long flags; - unsigned long timeout; - - if (reboot_force) - return; - - /* - * Use an own vector here because smp_call_function - * does lots of things not suitable in a panic situation. - */ - if (num_online_cpus() > 1) { - /* did someone beat us here? */ - if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) - return; - - if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, - NMI_FLAG_FIRST, "smp_stop")) - /* Note: we ignore failures here */ - return; - - /* sync above data before sending NMI */ - wmb(); - - apic->send_IPI_allbutself(NMI_VECTOR); - - /* - * Don't wait longer than a second if the caller - * didn't ask us to wait. - */ - timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && (wait || timeout--)) - udelay(1); - } - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); -} - -/* - * this function calls the 'stop' function on all other CPUs in the system. - */ - -asmlinkage void smp_reboot_interrupt(void) -{ - ack_APIC_irq(); - irq_enter(); - stop_this_cpu(NULL); - irq_exit(); -} - -static void native_irq_stop_other_cpus(int wait) -{ - unsigned long flags; - unsigned long timeout; - - if (reboot_force) - return; - - /* - * Use an own vector here because smp_call_function - * does lots of things not suitable in a panic situation. - * On most systems we could also use an NMI here, - * but there are a few systems around where NMI - * is problematic so stay with an non NMI for now - * (this implies we cannot stop CPUs spinning with irq off - * currently) - */ - if (num_online_cpus() > 1) { - apic->send_IPI_allbutself(REBOOT_VECTOR); - - /* - * Don't wait longer than a second if the caller - * didn't ask us to wait. - */ - timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && (wait || timeout--)) - udelay(1); - } - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); -} - -static void native_smp_disable_nmi_ipi(void) -{ - smp_ops.stop_other_cpus = native_irq_stop_other_cpus; -} - -/* - * Reschedule call back. - */ -void smp_reschedule_interrupt(struct pt_regs *regs) -{ - ack_APIC_irq(); - inc_irq_stat(irq_resched_count); - scheduler_ipi(); - /* - * KVM uses this interrupt to force a cpu out of guest mode - */ -} - -void smp_call_function_interrupt(struct pt_regs *regs) -{ - ack_APIC_irq(); - irq_enter(); - generic_smp_call_function_interrupt(); - inc_irq_stat(irq_call_count); - irq_exit(); -} - -void smp_call_function_single_interrupt(struct pt_regs *regs) -{ - ack_APIC_irq(); - irq_enter(); - generic_smp_call_function_single_interrupt(); - inc_irq_stat(irq_call_count); - irq_exit(); -} - -static int __init nonmi_ipi_setup(char *str) -{ - native_smp_disable_nmi_ipi(); - return 1; -} - -__setup("nonmi_ipi", nonmi_ipi_setup); - -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, - .smp_prepare_cpus = native_smp_prepare_cpus, - .smp_cpus_done = native_smp_cpus_done, - - .stop_other_cpus = native_nmi_stop_other_cpus, - .smp_send_reschedule = native_smp_send_reschedule, - - .cpu_up = native_cpu_up, - .cpu_die = native_cpu_die, - .cpu_disable = native_cpu_disable, - .play_dead = native_play_dead, - - .send_call_func_ipi = native_send_call_func_ipi, - .send_call_func_single_ipi = native_send_call_func_single_ipi, -}; -EXPORT_SYMBOL_GPL(smp_ops); diff --git a/ANDROID_3.4.5/arch/x86/kernel/smpboot.c b/ANDROID_3.4.5/arch/x86/kernel/smpboot.c deleted file mode 100644 index 6e1e4060..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/smpboot.c +++ /dev/null @@ -1,1429 +0,0 @@ -/* - * x86 SMP booting functions - * - * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> - * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> - * Copyright 2001 Andi Kleen, SuSE Labs. - * - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * Original development of Linux SMP code supported by Caldera. - * - * This code is released under the GNU General Public License version 2 or - * later. - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIPS report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX APICs - * Andi Kleen : Changed for SMP boot into long mode. - * Martin J. Bligh : Added support for multi-quad systems - * Dave Jones : Report invalid combinations of Athlon CPUs. - * Rusty Russell : Hacked into shape for new "hotplug" boot process. - * Andi Kleen : Converted to new state machine. - * Ashok Raj : CPU hotplug support - * Glauber Costa : i386 and x86_64 integration - */ - -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/percpu.h> -#include <linux/bootmem.h> -#include <linux/err.h> -#include <linux/nmi.h> -#include <linux/tboot.h> -#include <linux/stackprotector.h> -#include <linux/gfp.h> -#include <linux/cpuidle.h> - -#include <asm/acpi.h> -#include <asm/desc.h> -#include <asm/nmi.h> -#include <asm/irq.h> -#include <asm/idle.h> -#include <asm/trampoline.h> -#include <asm/cpu.h> -#include <asm/numa.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/mtrr.h> -#include <asm/mwait.h> -#include <asm/apic.h> -#include <asm/io_apic.h> -#include <asm/setup.h> -#include <asm/uv/uv.h> -#include <linux/mc146818rtc.h> - -#include <asm/smpboot_hooks.h> -#include <asm/i8259.h> - -/* State of each CPU */ -DEFINE_PER_CPU(int, cpu_state) = { 0 }; - -/* Store all idle threads, this can be reused instead of creating -* a new thread. Also avoids complicated thread destroy functionality -* for idle threads. -*/ -#ifdef CONFIG_HOTPLUG_CPU -/* - * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is - * removed after init for !CONFIG_HOTPLUG_CPU. - */ -static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); -#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) -#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) - -/* - * We need this for trampoline_base protection from concurrent accesses when - * off- and onlining cores wildly. - */ -static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); - -void cpu_hotplug_driver_lock(void) -{ - mutex_lock(&x86_cpu_hotplug_driver_mutex); -} - -void cpu_hotplug_driver_unlock(void) -{ - mutex_unlock(&x86_cpu_hotplug_driver_mutex); -} - -ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } -ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } -#else -static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) -#endif - -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - -/* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; - -/* representing HT siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); -EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); - -/* representing HT and core siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); -EXPORT_PER_CPU_SYMBOL(cpu_core_map); - -DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); - -/* Per CPU bogomips and other parameters */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - -atomic_t init_deasserted; - -/* - * Report back to the Boot Processor. - * Running on AP. - */ -static void __cpuinit smp_callin(void) -{ - int cpuid, phys_id; - unsigned long timeout; - - /* - * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - */ - if (apic->wait_for_init_deassert) - apic->wait_for_init_deassert(&init_deasserted); - - /* - * (This works even if the APIC is not enabled.) - */ - phys_id = read_apic_id(); - cpuid = smp_processor_id(); - if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { - panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, - phys_id, cpuid); - } - pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpumask_test_cpu(cpuid, cpu_callout_mask)) - break; - cpu_relax(); - } - - if (!time_before(jiffies, timeout)) { - panic("%s: CPU%d started up but did not get a callout!\n", - __func__, cpuid); - } - - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - pr_debug("CALLIN, before setup_local_APIC().\n"); - if (apic->smp_callin_clear_local_apic) - apic->smp_callin_clear_local_apic(); - setup_local_APIC(); - end_local_APIC_setup(); - - /* - * Need to setup vector mappings before we enable interrupts. - */ - setup_vector_irq(smp_processor_id()); - - /* - * Save our processor parameters. Note: this information - * is needed for clock calibration. - */ - smp_store_cpu_info(cpuid); - - /* - * Get our bogomips. - * Update loops_per_jiffy in cpu_data. Previous call to - * smp_store_cpu_info() stored a value that is close but not as - * accurate as the value just calculated. - */ - calibrate_delay(); - cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; - pr_debug("Stack at about %p\n", &cpuid); - - /* - * This must be done before setting cpu_online_mask - * or calling notify_cpu_starting. - */ - set_cpu_sibling_map(raw_smp_processor_id()); - wmb(); - - notify_cpu_starting(cpuid); - - /* - * Allow the master to continue. - */ - cpumask_set_cpu(cpuid, cpu_callin_mask); -} - -/* - * Activate a secondary processor. - */ -notrace static void __cpuinit start_secondary(void *unused) -{ - /* - * Don't put *anything* before cpu_init(), SMP booting is too - * fragile that we want to limit the things done here to the - * most necessary things. - */ - cpu_init(); - x86_cpuinit.early_percpu_clock_init(); - preempt_disable(); - smp_callin(); - -#ifdef CONFIG_X86_32 - /* switch away from the initial page table */ - load_cr3(swapper_pg_dir); - __flush_tlb_all(); -#endif - - /* otherwise gcc will move up smp_processor_id before the cpu_init */ - barrier(); - /* - * Check TSC synchronization with the BP: - */ - check_tsc_sync_target(); - - /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - * - * We need to hold vector_lock so there the set of online cpus - * does not change while we are assigning vectors to cpus. Holding - * this lock ensures we don't half assign or remove an irq from a cpu. - */ - ipi_call_lock(); - lock_vector_lock(); - set_cpu_online(smp_processor_id(), true); - unlock_vector_lock(); - ipi_call_unlock(); - per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; - x86_platform.nmi_init(); - - /* enable local interrupts */ - local_irq_enable(); - - /* to prevent fake stack check failure in clock setup */ - boot_init_stack_canary(); - - x86_cpuinit.setup_percpu_clockev(); - - wmb(); - cpu_idle(); -} - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -void __cpuinit smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = &cpu_data(id); - - *c = boot_cpu_data; - c->cpu_index = id; - if (id != 0) - identify_secondary_cpu(c); -} - -static void __cpuinit link_thread_siblings(int cpu1, int cpu2) -{ - cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); - cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); - cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); - cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); - cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); - cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); -} - - -void __cpuinit set_cpu_sibling_map(int cpu) -{ - int i; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - cpumask_set_cpu(cpu, cpu_sibling_setup_mask); - - if (smp_num_siblings > 1) { - for_each_cpu(i, cpu_sibling_setup_mask) { - struct cpuinfo_x86 *o = &cpu_data(i); - - if (cpu_has(c, X86_FEATURE_TOPOEXT)) { - if (c->phys_proc_id == o->phys_proc_id && - per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) && - c->compute_unit_id == o->compute_unit_id) - link_thread_siblings(cpu, i); - } else if (c->phys_proc_id == o->phys_proc_id && - c->cpu_core_id == o->cpu_core_id) { - link_thread_siblings(cpu, i); - } - } - } else { - cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); - } - - cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); - - if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { - cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); - c->booted_cores = 1; - return; - } - - for_each_cpu(i, cpu_sibling_setup_mask) { - if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && - per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); - cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); - } - if (c->phys_proc_id == cpu_data(i).phys_proc_id) { - cpumask_set_cpu(i, cpu_core_mask(cpu)); - cpumask_set_cpu(cpu, cpu_core_mask(i)); - /* - * Does this new cpu bringup a new core? - */ - if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) { - /* - * for each core in package, increment - * the booted_cores for this new cpu - */ - if (cpumask_first(cpu_sibling_mask(i)) == i) - c->booted_cores++; - /* - * increment the core count for all - * the other cpus in this package - */ - if (i != cpu) - cpu_data(i).booted_cores++; - } else if (i != cpu && !c->booted_cores) - c->booted_cores = cpu_data(i).booted_cores; - } - } -} - -/* maps the cpu to the sched domain representing multi-core */ -const struct cpumask *cpu_coregroup_mask(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - /* - * For perf, we return last level cache shared map. - * And for power savings, we return cpu_core_map - */ - if ((sched_mc_power_savings || sched_smt_power_savings) && - !(cpu_has(c, X86_FEATURE_AMD_DCM))) - return cpu_core_mask(cpu); - else - return cpu_llc_shared_mask(cpu); -} - -static void impress_friends(void) -{ - int cpu; - unsigned long bogosum = 0; - /* - * Allow the user to impress friends. - */ - pr_debug("Before bogomips.\n"); - for_each_possible_cpu(cpu) - if (cpumask_test_cpu(cpu, cpu_callout_mask)) - bogosum += cpu_data(cpu).loops_per_jiffy; - printk(KERN_INFO - "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), - bogosum/(500000/HZ), - (bogosum/(5000/HZ))%100); - - pr_debug("Before bogocount - setting activated=1.\n"); -} - -void __inquire_remote_apic(int apicid) -{ - unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; - const char * const names[] = { "ID", "VERSION", "SPIV" }; - int timeout; - u32 status; - - printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); - - for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); - - /* - * Wait for idle. - */ - status = safe_apic_wait_icr_idle(); - if (status) - printk(KERN_CONT - "a previous APIC delivery may have failed\n"); - - apic_icr_write(APIC_DM_REMRD | regs[i], apicid); - - timeout = 0; - do { - udelay(100); - status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; - } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); - - switch (status) { - case APIC_ICR_RR_VALID: - status = apic_read(APIC_RRR); - printk(KERN_CONT "%08x\n", status); - break; - default: - printk(KERN_CONT "failed\n"); - } - } -} - -/* - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this - * won't ... remember to clear down the APIC, etc later. - */ -int __cpuinit -wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) -{ - unsigned long send_status, accept_status = 0; - int maxlvt; - - /* Target chip */ - /* Boot on the stack */ - /* Kick the second */ - apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); - } - pr_debug("NMI sent.\n"); - - if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); - if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} - -static int __cpuinit -wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) -{ - unsigned long send_status, accept_status = 0; - int maxlvt, num_starts, j; - - maxlvt = lapic_get_maxlvt(); - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) { - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - pr_debug("Asserting INIT.\n"); - - /* - * Turn INIT on target chip - */ - /* - * Send IPI - */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, - phys_apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - mdelay(10); - - pr_debug("Deasserting INIT.\n"); - - /* Target chip */ - /* Send IPI */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - mb(); - atomic_set(&init_deasserted, 1); - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; - - /* - * Paravirt / VMI wants a startup IPI hook here to set up the - * target processor state. - */ - startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, - stack_start); - - /* - * Run STARTUP IPI loop. - */ - pr_debug("#startup loops: %d.\n", num_starts); - - for (j = 1; j <= num_starts; j++) { - pr_debug("Sending STARTUP #%d.\n", j); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - pr_debug("After apic_write.\n"); - - /* - * STARTUP IPI - */ - - /* Target chip */ - /* Boot on the stack */ - /* Kick the second */ - apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), - phys_apicid); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(300); - - pr_debug("Startup point 1.\n"); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); - if (send_status || accept_status) - break; - } - pr_debug("After Startup.\n"); - - if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); - if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} - -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit do_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - -/* reduce the number of lines printed when booting a large cpu count system */ -static void __cpuinit announce_cpu(int cpu, int apicid) -{ - static int current_node = -1; - int node = early_cpu_to_node(cpu); - - if (system_state == SYSTEM_BOOTING) { - if (node != current_node) { - if (current_node > (-1)) - pr_cont(" Ok.\n"); - current_node = node; - pr_info("Booting Node %3d, Processors ", node); - } - pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); - return; - } else - pr_info("Booting Node %d Processor %d APIC 0x%x\n", - node, cpu, apicid); -} - -/* - * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad - * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from - * ->wakeup_secondary_cpu. - */ -static int __cpuinit do_boot_cpu(int apicid, int cpu) -{ - unsigned long boot_error = 0; - unsigned long start_ip; - int timeout; - struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), - }; - - INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); - - alternatives_smp_switch(1); - - c_idle.idle = get_idle_for_cpu(cpu); - - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ - if (c_idle.idle) { - c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) - (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); - init_idle(c_idle.idle, cpu); - goto do_rest; - } - - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - - if (IS_ERR(c_idle.idle)) { - printk("failed fork for CPU %d\n", cpu); - destroy_work_on_stack(&c_idle.work); - return PTR_ERR(c_idle.idle); - } - - set_idle_for_cpu(cpu, c_idle.idle); -do_rest: - per_cpu(current_task, cpu) = c_idle.idle; -#ifdef CONFIG_X86_32 - /* Stack for startup_32 can be just as for start_secondary onwards */ - irq_ctx_init(cpu); -#else - clear_tsk_thread_flag(c_idle.idle, TIF_FORK); - initial_gs = per_cpu_offset(cpu); - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(c_idle.idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; -#endif - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - initial_code = (unsigned long)start_secondary; - stack_start = c_idle.idle->thread.sp; - - /* start_ip had better be page-aligned! */ - start_ip = trampoline_address(); - - /* So we see what's up */ - announce_cpu(cpu, apicid); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - atomic_set(&init_deasserted, 0); - - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { - - pr_debug("Setting warm reset code and vector.\n"); - - smpboot_setup_warm_reset_vector(start_ip); - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - } - - /* - * Kick the secondary CPU. Use the method in the APIC driver - * if it's defined - or use an INIT boot APIC message otherwise: - */ - if (apic->wakeup_secondary_cpu) - boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); - else - boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - pr_debug("Before Callout %d.\n", cpu); - cpumask_set_cpu(cpu, cpu_callout_mask); - pr_debug("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpumask_test_cpu(cpu, cpu_callin_mask)) - break; /* It has booted */ - udelay(100); - /* - * Allow other tasks to run while we wait for the - * AP to come online. This also gives a chance - * for the MTRR work(triggered by the AP coming online) - * to be completed in the stop machine context. - */ - schedule(); - } - - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - print_cpu_msr(&cpu_data(cpu)); - pr_debug("CPU%d: has booted.\n", cpu); - } else { - boot_error = 1; - if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) - == 0xA5A5A5A5) - /* trampoline started but...? */ - pr_err("CPU%d: Stuck ??\n", cpu); - else - /* trampoline code not run */ - pr_err("CPU%d: Not responding.\n", cpu); - if (apic->inquire_remote_apic) - apic->inquire_remote_apic(apicid); - } - } - - if (boot_error) { - /* Try to put things back the way they were before ... */ - numa_remove_cpu(cpu); /* was set by numa_add_cpu */ - - /* was set by do_boot_cpu() */ - cpumask_clear_cpu(cpu, cpu_callout_mask); - - /* was set by cpu_init() */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); - - set_cpu_present(cpu, false); - per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; - } - - /* mark "stuck" area as not stuck */ - *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0; - - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); - } - - destroy_work_on_stack(&c_idle.work); - return boot_error; -} - -int __cpuinit native_cpu_up(unsigned int cpu) -{ - int apicid = apic->cpu_present_to_apicid(cpu); - unsigned long flags; - int err; - - WARN_ON(irqs_disabled()); - - pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); - - if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || - !physid_isset(apicid, phys_cpu_present_map) || - !apic->apic_id_valid(apicid)) { - printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); - return -EINVAL; - } - - /* - * Already booted CPU? - */ - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - pr_debug("do_boot_cpu %d Already started\n", cpu); - return -ENOSYS; - } - - /* - * Save current MTRR state in case it was changed since early boot - * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: - */ - mtrr_save_state(); - - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - - err = do_boot_cpu(apicid, cpu); - if (err) { - pr_debug("do_boot_cpu failed %d\n", err); - return -EIO; - } - - /* - * Check TSC synchronization with the AP (keep irqs disabled - * while doing so): - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); - - while (!cpu_online(cpu)) { - cpu_relax(); - touch_nmi_watchdog(); - } - - return 0; -} - -/** - * arch_disable_smp_support() - disables SMP support for x86 at runtime - */ -void arch_disable_smp_support(void) -{ - disable_ioapic_support(); -} - -/* - * Fall back to non SMP mode after errors. - * - * RED-PEN audit/test this more. I bet there is more state messed up here. - */ -static __init void disable_smp(void) -{ - init_cpu_present(cpumask_of(0)); - init_cpu_possible(cpumask_of(0)); - smpboot_clear_io_apic_irqs(); - - if (smp_found_config) - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - else - physid_set_mask_of_physid(0, &phys_cpu_present_map); - cpumask_set_cpu(0, cpu_sibling_mask(0)); - cpumask_set_cpu(0, cpu_core_mask(0)); -} - -/* - * Various sanity checks. - */ -static int __init smp_sanity_check(unsigned max_cpus) -{ - preempt_disable(); - -#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) - if (def_to_bigsmp && nr_cpu_ids > 8) { - unsigned int cpu; - unsigned nr; - - printk(KERN_WARNING - "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_BIGSMP.\n"); - - nr = 0; - for_each_present_cpu(cpu) { - if (nr >= 8) - set_cpu_present(cpu, false); - nr++; - } - - nr = 0; - for_each_possible_cpu(cpu) { - if (nr >= 8) - set_cpu_possible(cpu, false); - nr++; - } - - nr_cpu_ids = 8; - } -#endif - - if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { - printk(KERN_WARNING - "weird, boot CPU (#%d) not listed by the BIOS.\n", - hard_smp_processor_id()); - - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config && !acpi_lapic) { - preempt_enable(); - printk(KERN_NOTICE "SMP motherboard not detected.\n"); - disable_smp(); - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); - return -1; - } - - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - */ - if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk(KERN_NOTICE - "weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - preempt_enable(); - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && - !cpu_has_apic) { - if (!disable_apic) { - pr_err("BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - pr_err("... forcing use of dummy APIC emulation." - "(tell your hw vendor)\n"); - } - smpboot_clear_io_apic(); - disable_ioapic_support(); - return -1; - } - - verify_local_APIC(); - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated.\n"); - smpboot_clear_io_apic(); - - connect_bsp_APIC(); - setup_local_APIC(); - bsp_end_local_APIC_setup(); - return -1; - } - - return 0; -} - -static void __init smp_cpu_index_default(void) -{ - int i; - struct cpuinfo_x86 *c; - - for_each_possible_cpu(i) { - c = &cpu_data(i); - /* mark all to hotplug */ - c->cpu_index = nr_cpu_ids; - } -} - -/* - * Prepare for SMP bootup. The MP table or ACPI has been read - * earlier. Just do some sanity checking here and enable APIC mode. - */ -void __init native_smp_prepare_cpus(unsigned int max_cpus) -{ - unsigned int i; - - preempt_disable(); - smp_cpu_index_default(); - - /* - * Setup boot CPU information - */ - smp_store_cpu_info(0); /* Final full version of the data */ - cpumask_copy(cpu_callin_mask, cpumask_of(0)); - mb(); - - current_thread_info()->cpu = 0; /* needed? */ - for_each_possible_cpu(i) { - zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); - } - set_cpu_sibling_map(0); - - - if (smp_sanity_check(max_cpus) < 0) { - printk(KERN_INFO "SMP disabled\n"); - disable_smp(); - goto out; - } - - default_setup_apic_routing(); - - preempt_disable(); - if (read_apic_id() != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - read_apic_id(), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } - preempt_enable(); - - connect_bsp_APIC(); - - /* - * Switch from PIC to APIC mode. - */ - setup_local_APIC(); - - /* - * Enable IO APIC before setting up error vector - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); - - bsp_end_local_APIC_setup(); - - if (apic->setup_portio_remap) - apic->setup_portio_remap(); - - smpboot_setup_io_apic(); - /* - * Set up local APIC timer on boot CPU. - */ - - printk(KERN_INFO "CPU%d: ", 0); - print_cpu_info(&cpu_data(0)); - x86_init.timers.setup_percpu_clockev(); - - if (is_uv_system()) - uv_system_init(); - - set_mtrr_aps_delayed_init(); -out: - preempt_enable(); -} - -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - -void arch_enable_nonboot_cpus_begin(void) -{ - set_mtrr_aps_delayed_init(); -} - -void arch_enable_nonboot_cpus_end(void) -{ - mtrr_aps_init(); -} - -/* - * Early setup to make printk work. - */ -void __init native_smp_prepare_boot_cpu(void) -{ - int me = smp_processor_id(); - switch_to_new_gdt(me); - /* already set me in cpu_online_mask in boot_cpu_init() */ - cpumask_set_cpu(me, cpu_callout_mask); - per_cpu(cpu_state, me) = CPU_ONLINE; -} - -void __init native_smp_cpus_done(unsigned int max_cpus) -{ - pr_debug("Boot done.\n"); - - nmi_selftest(); - impress_friends(); -#ifdef CONFIG_X86_IO_APIC - setup_ioapic_dest(); -#endif - mtrr_aps_init(); -} - -static int __initdata setup_possible_cpus = -1; -static int __init _setup_possible_cpus(char *str) -{ - get_option(&str, &setup_possible_cpus); - return 0; -} -early_param("possible_cpus", _setup_possible_cpus); - - -/* - * cpu_possible_mask should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and dont expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_mask on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM - * - Otherwise don't reserve additional CPUs. - * We do this because additional CPUs waste a lot of memory. - * -AK - */ -__init void prefill_possible_map(void) -{ - int i, possible; - - /* no processor from mptable or madt */ - if (!num_processors) - num_processors = 1; - - i = setup_max_cpus ?: 1; - if (setup_possible_cpus == -1) { - possible = num_processors; -#ifdef CONFIG_HOTPLUG_CPU - if (setup_max_cpus) - possible += disabled_cpus; -#else - if (possible > i) - possible = i; -#endif - } else - possible = setup_possible_cpus; - - total_cpus = max_t(int, possible, num_processors + disabled_cpus); - - /* nr_cpu_ids could be reduced via nr_cpus= */ - if (possible > nr_cpu_ids) { - printk(KERN_WARNING - "%d Processors exceeds NR_CPUS limit of %d\n", - possible, nr_cpu_ids); - possible = nr_cpu_ids; - } - -#ifdef CONFIG_HOTPLUG_CPU - if (!setup_max_cpus) -#endif - if (possible > i) { - printk(KERN_WARNING - "%d Processors exceeds max_cpus limit of %u\n", - possible, setup_max_cpus); - possible = i; - } - - printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", - possible, max_t(int, possible - num_processors, 0)); - - for (i = 0; i < possible; i++) - set_cpu_possible(i, true); - for (; i < NR_CPUS; i++) - set_cpu_possible(i, false); - - nr_cpu_ids = possible; -} - -#ifdef CONFIG_HOTPLUG_CPU - -static void remove_siblinginfo(int cpu) -{ - int sibling; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - for_each_cpu(sibling, cpu_core_mask(cpu)) { - cpumask_clear_cpu(cpu, cpu_core_mask(sibling)); - /*/ - * last thread sibling in this cpu core going down - */ - if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) - cpu_data(sibling).booted_cores--; - } - - for_each_cpu(sibling, cpu_sibling_mask(cpu)) - cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); - cpumask_clear(cpu_sibling_mask(cpu)); - cpumask_clear(cpu_core_mask(cpu)); - c->phys_proc_id = 0; - c->cpu_core_id = 0; - cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); -} - -static void __ref remove_cpu_from_maps(int cpu) -{ - set_cpu_online(cpu, false); - cpumask_clear_cpu(cpu, cpu_callout_mask); - cpumask_clear_cpu(cpu, cpu_callin_mask); - /* was set by cpu_init() */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); - numa_remove_cpu(cpu); -} - -void cpu_disable_common(void) -{ - int cpu = smp_processor_id(); - - remove_siblinginfo(cpu); - - /* It's now safe to remove this processor from the online map */ - lock_vector_lock(); - remove_cpu_from_maps(cpu); - unlock_vector_lock(); - fixup_irqs(); -} - -int native_cpu_disable(void) -{ - int cpu = smp_processor_id(); - - /* - * Perhaps use cpufreq to drop frequency, but that could go - * into generic code. - * - * We won't take down the boot processor on i386 due to some - * interrupts only being able to be serviced by the BSP. - * Especially so if we're not using an IOAPIC -zwane - */ - if (cpu == 0) - return -EBUSY; - - clear_local_APIC(); - - cpu_disable_common(); - return 0; -} - -void native_cpu_die(unsigned int cpu) -{ - /* We don't do anything here: idle task is faking death itself. */ - unsigned int i; - - for (i = 0; i < 10; i++) { - /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - if (system_state == SYSTEM_RUNNING) - pr_info("CPU %u is now offline\n", cpu); - - if (1 == num_online_cpus()) - alternatives_smp_switch(0); - return; - } - msleep(100); - } - pr_err("CPU %u didn't die...\n", cpu); -} - -void play_dead_common(void) -{ - idle_task_exit(); - reset_lazy_tlbstate(); - amd_e400_remove_cpu(raw_smp_processor_id()); - - mb(); - /* Ack it */ - __this_cpu_write(cpu_state, CPU_DEAD); - - /* - * With physical CPU hotplug, we should halt the cpu - */ - local_irq_disable(); -} - -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - void *mwait_ptr; - struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - - if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) - return; - if (!this_cpu_has(X86_FEATURE_CLFLSH)) - return; - if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) - return; - - eax = CPUID_MWAIT_LEAF; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - /* - * This should be a memory location in a cache line which is - * unlikely to be touched by other processors. The actual - * content is immaterial as it is not actually modified in any way. - */ - mwait_ptr = ¤t_thread_info()->flags; - - wbinvd(); - - while (1) { - /* - * The CLFLUSH is a workaround for erratum AAI65 for - * the Xeon 7400 series. It's not clear it is actually - * needed, but it should be harmless in either case. - * The WBINVD is insufficient due to the spurious-wakeup - * case where we return around the loop. - */ - clflush(mwait_ptr); - __monitor(mwait_ptr, 0, 0); - mb(); - __mwait(eax, 0); - } -} - -static inline void hlt_play_dead(void) -{ - if (__this_cpu_read(cpu_info.x86) >= 4) - wbinvd(); - - while (1) { - native_halt(); - } -} - -void native_play_dead(void) -{ - play_dead_common(); - tboot_shutdown(TB_SHUTDOWN_WFS); - - mwait_play_dead(); /* Only returns on failure */ - if (cpuidle_play_dead()) - hlt_play_dead(); -} - -#else /* ... !CONFIG_HOTPLUG_CPU */ -int native_cpu_disable(void) -{ - return -ENOSYS; -} - -void native_cpu_die(unsigned int cpu) -{ - /* We said "no" in __cpu_disable */ - BUG(); -} - -void native_play_dead(void) -{ - BUG(); -} - -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/stacktrace.c b/ANDROID_3.4.5/arch/x86/kernel/stacktrace.c deleted file mode 100644 index fdd0c643..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/stacktrace.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Stack trace management functions - * - * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> - */ -#include <linux/sched.h> -#include <linux/stacktrace.h> -#include <linux/module.h> -#include <linux/uaccess.h> -#include <asm/stacktrace.h> - -static int save_stack_stack(void *data, char *name) -{ - return 0; -} - -static void -__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) -{ - struct stack_trace *trace = data; -#ifdef CONFIG_FRAME_POINTER - if (!reliable) - return; -#endif - if (nosched && in_sched_functions(addr)) - return; - if (trace->skip > 0) { - trace->skip--; - return; - } - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = addr; -} - -static void save_stack_address(void *data, unsigned long addr, int reliable) -{ - return __save_stack_address(data, addr, reliable, false); -} - -static void -save_stack_address_nosched(void *data, unsigned long addr, int reliable) -{ - return __save_stack_address(data, addr, reliable, true); -} - -static const struct stacktrace_ops save_stack_ops = { - .stack = save_stack_stack, - .address = save_stack_address, - .walk_stack = print_context_stack, -}; - -static const struct stacktrace_ops save_stack_ops_nosched = { - .stack = save_stack_stack, - .address = save_stack_address_nosched, - .walk_stack = print_context_stack, -}; - -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -void save_stack_trace(struct stack_trace *trace) -{ - dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; -} -EXPORT_SYMBOL_GPL(save_stack_trace); - -void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) -{ - dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; -} - -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) -{ - dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; -} -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); - -/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ - -struct stack_frame_user { - const void __user *next_fp; - unsigned long ret_addr; -}; - -static int -copy_stack_frame(const void __user *fp, struct stack_frame_user *frame) -{ - int ret; - - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; - - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); - - return ret; -} - -static inline void __save_stack_trace_user(struct stack_trace *trace) -{ - const struct pt_regs *regs = task_pt_regs(current); - const void __user *fp = (const void __user *)regs->bp; - - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = regs->ip; - - while (trace->nr_entries < trace->max_entries) { - struct stack_frame_user frame; - - frame.next_fp = NULL; - frame.ret_addr = 0; - if (!copy_stack_frame(fp, &frame)) - break; - if ((unsigned long)fp < regs->sp) - break; - if (frame.ret_addr) { - trace->entries[trace->nr_entries++] = - frame.ret_addr; - } - if (fp == frame.next_fp) - break; - fp = frame.next_fp; - } -} - -void save_stack_trace_user(struct stack_trace *trace) -{ - /* - * Trace user stack if we are not a kernel thread - */ - if (current->mm) { - __save_stack_trace_user(trace); - } - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/step.c b/ANDROID_3.4.5/arch/x86/kernel/step.c deleted file mode 100644 index c346d116..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/step.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - * x86 single-step support code, common to 32-bit and 64-bit. - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/ptrace.h> -#include <asm/desc.h> - -unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) -{ - unsigned long addr, seg; - - addr = regs->ip; - seg = regs->cs & 0xffff; - if (v8086_mode(regs)) { - addr = (addr & 0xffff) + (seg << 4); - return addr; - } - - /* - * We'll assume that the code segments in the GDT - * are all zero-based. That is largely true: the - * TLS segments are used for data, and the PNPBIOS - * and APM bios ones we just ignore here. - */ - if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { - struct desc_struct *desc; - unsigned long base; - - seg &= ~7UL; - - mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) - addr = -1L; /* bogus selector, access would fault */ - else { - desc = child->mm->context.ldt + seg; - base = get_desc_base(desc); - - /* 16-bit code segment? */ - if (!desc->d) - addr &= 0xffff; - addr += base; - } - mutex_unlock(&child->mm->context.lock); - } - - return addr; -} - -static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) -{ - int i, copied; - unsigned char opcode[15]; - unsigned long addr = convert_ip_to_linear(child, regs); - - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); - for (i = 0; i < copied; i++) { - switch (opcode[i]) { - /* popf and iret */ - case 0x9d: case 0xcf: - return 1; - - /* CHECKME: 64 65 */ - - /* opcode and address size prefixes */ - case 0x66: case 0x67: - continue; - /* irrelevant prefixes (segment overrides and repeats) */ - case 0x26: case 0x2e: - case 0x36: case 0x3e: - case 0x64: case 0x65: - case 0xf0: case 0xf2: case 0xf3: - continue; - -#ifdef CONFIG_X86_64 - case 0x40 ... 0x4f: - if (!user_64bit_mode(regs)) - /* 32-bit mode: register increment */ - return 0; - /* 64-bit mode: REX prefix */ - continue; -#endif - - /* CHECKME: f2, f3 */ - - /* - * pushf: NOTE! We should probably not let - * the user see the TF bit being set. But - * it's more pain than it's worth to avoid - * it, and a debugger could emulate this - * all in user space if it _really_ cares. - */ - case 0x9c: - default: - return 0; - } - } - return 0; -} - -/* - * Enable single-stepping. Return nonzero if user mode is not using TF itself. - */ -static int enable_single_step(struct task_struct *child) -{ - struct pt_regs *regs = task_pt_regs(child); - unsigned long oflags; - - /* - * If we stepped into a sysenter/syscall insn, it trapped in - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. - * If user-mode had set TF itself, then it's still clear from - * do_debug() and we need to set it again to restore the user - * state so we don't wrongly set TIF_FORCED_TF below. - * If enable_single_step() was used last and that is what - * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are - * already set and our bookkeeping is fine. - */ - if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP))) - regs->flags |= X86_EFLAGS_TF; - - /* - * Always set TIF_SINGLESTEP - this guarantees that - * we single-step system calls etc.. This will also - * cause us to set TF when returning to user mode. - */ - set_tsk_thread_flag(child, TIF_SINGLESTEP); - - oflags = regs->flags; - - /* Set TF on the kernel stack.. */ - regs->flags |= X86_EFLAGS_TF; - - /* - * ..but if TF is changed by the instruction we will trace, - * don't mark it as being "us" that set it, so that we - * won't clear it by hand later. - * - * Note that if we don't actually execute the popf because - * of a signal arriving right now or suchlike, we will lose - * track of the fact that it really was "us" that set it. - */ - if (is_setting_trap_flag(child, regs)) { - clear_tsk_thread_flag(child, TIF_FORCED_TF); - return 0; - } - - /* - * If TF was already set, check whether it was us who set it. - * If not, we should never attempt a block step. - */ - if (oflags & X86_EFLAGS_TF) - return test_tsk_thread_flag(child, TIF_FORCED_TF); - - set_tsk_thread_flag(child, TIF_FORCED_TF); - - return 1; -} - -/* - * Enable single or block step. - */ -static void enable_step(struct task_struct *child, bool block) -{ - /* - * Make sure block stepping (BTF) is not enabled unless it should be. - * Note that we don't try to worry about any is_setting_trap_flag() - * instructions after the first when using block stepping. - * So no one should try to use debugger block stepping in a program - * that uses user-mode single stepping itself. - */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } -} - -void user_enable_single_step(struct task_struct *child) -{ - enable_step(child, 0); -} - -void user_enable_block_step(struct task_struct *child) -{ - enable_step(child, 1); -} - -void user_disable_single_step(struct task_struct *child) -{ - /* - * Make sure block stepping (BTF) is disabled. - */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } - - /* Always clear TIF_SINGLESTEP... */ - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* But touch TF only if it was set by us.. */ - if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) - task_pt_regs(child)->flags &= ~X86_EFLAGS_TF; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/sys_i386_32.c b/ANDROID_3.4.5/arch/x86/kernel/sys_i386_32.c deleted file mode 100644 index 0b0cb5fe..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/sys_i386_32.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * This file contains various random system calls that - * have a non-standard calling sequence on the Linux/i386 - * platform. - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/stat.h> -#include <linux/syscalls.h> -#include <linux/mman.h> -#include <linux/file.h> -#include <linux/utsname.h> -#include <linux/ipc.h> - -#include <linux/uaccess.h> -#include <linux/unistd.h> - -#include <asm/syscalls.h> - -/* - * Do a system call from kernel instead of calling sys_execve so we - * end up with proper pt_regs. - */ -int kernel_execve(const char *filename, - const char *const argv[], - const char *const envp[]) -{ - long __res; - asm volatile ("int $0x80" - : "=a" (__res) - : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory"); - return __res; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/sys_x86_64.c b/ANDROID_3.4.5/arch/x86/kernel/sys_x86_64.c deleted file mode 100644 index b4d3c392..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/sys_x86_64.c +++ /dev/null @@ -1,283 +0,0 @@ -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/syscalls.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/stat.h> -#include <linux/mman.h> -#include <linux/file.h> -#include <linux/utsname.h> -#include <linux/personality.h> -#include <linux/random.h> -#include <linux/uaccess.h> -#include <linux/elf.h> - -#include <asm/ia32.h> -#include <asm/syscalls.h> - -/* - * Align a virtual address to avoid aliasing in the I$ on AMD F15h. - * - * @flags denotes the allocation direction - bottomup or topdown - - * or vDSO; see call sites below. - */ -unsigned long align_addr(unsigned long addr, struct file *filp, - enum align_flags flags) -{ - unsigned long tmp_addr; - - /* handle 32- and 64-bit case with a single conditional */ - if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) - return addr; - - if (!(current->flags & PF_RANDOMIZE)) - return addr; - - if (!((flags & ALIGN_VDSO) || filp)) - return addr; - - tmp_addr = addr; - - /* - * We need an address which is <= than the original - * one only when in topdown direction. - */ - if (!(flags & ALIGN_TOPDOWN)) - tmp_addr += va_align.mask; - - tmp_addr &= ~va_align.mask; - - return tmp_addr; -} - -static int __init control_va_addr_alignment(char *str) -{ - /* guard against enabling this on other CPU families */ - if (va_align.flags < 0) - return 1; - - if (*str == 0) - return 1; - - if (*str == '=') - str++; - - if (!strcmp(str, "32")) - va_align.flags = ALIGN_VA_32; - else if (!strcmp(str, "64")) - va_align.flags = ALIGN_VA_64; - else if (!strcmp(str, "off")) - va_align.flags = 0; - else if (!strcmp(str, "on")) - va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; - else - return 0; - - return 1; -} -__setup("align_va_addr", control_va_addr_alignment); - -SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, - unsigned long, prot, unsigned long, flags, - unsigned long, fd, unsigned long, off) -{ - long error; - error = -EINVAL; - if (off & ~PAGE_MASK) - goto out; - - error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); -out: - return error; -} - -static void find_start_end(unsigned long flags, unsigned long *begin, - unsigned long *end) -{ - if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) { - unsigned long new_begin; - /* This is usually used needed to map code in small - model, so it needs to be in the first 31bit. Limit - it to that. This means we need to move the - unmapped base down for this case. This can give - conflicts with the heap, but we assume that glibc - malloc knows how to fall back to mmap. Give it 1GB - of playground for now. -AK */ - *begin = 0x40000000; - *end = 0x80000000; - if (current->flags & PF_RANDOMIZE) { - new_begin = randomize_range(*begin, *begin + 0x02000000, 0); - if (new_begin) - *begin = new_begin; - } - } else { - *begin = TASK_UNMAPPED_BASE; - *end = TASK_SIZE; - } -} - -unsigned long -arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - unsigned long begin, end; - - if (flags & MAP_FIXED) - return addr; - - find_start_end(flags, &begin, &end); - - if (len > end) - return -ENOMEM; - - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (end - len >= addr && - (!vma || addr + len <= vma->vm_start)) - return addr; - } - if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32)) - && len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = begin; - } - addr = mm->free_area_cache; - if (addr < begin) - addr = begin; - start_addr = addr; - -full_search: - - addr = align_addr(addr, filp, 0); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (end - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != begin) { - start_addr = addr = begin; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - addr = vma->vm_end; - addr = align_addr(addr, filp, 0); - } -} - - -unsigned long -arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct vm_area_struct *vma; - struct mm_struct *mm = current->mm; - unsigned long addr = addr0, start_addr; - - /* requested length too big for entire address space */ - if (len > TASK_SIZE) - return -ENOMEM; - - if (flags & MAP_FIXED) - return addr; - - /* for MAP_32BIT mappings we force the legact mmap base */ - if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) - goto bottomup; - - /* requesting a specific address */ - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) - return addr; - } - - /* check if free_area_cache is useful for us */ - if (len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - -try_again: - /* either no address requested or can't fit in requested address hole */ - start_addr = addr = mm->free_area_cache; - - if (addr < len) - goto fail; - - addr -= len; - do { - addr = align_addr(addr, filp, ALIGN_TOPDOWN); - - /* - * Lookup failure means no vma is above this address, - * else if new region fits below vma->vm_start, - * return with success: - */ - vma = find_vma(mm, addr); - if (!vma || addr+len <= vma->vm_start) - /* remember the address as a hint for next time */ - return mm->free_area_cache = addr; - - /* remember the largest hole we saw so far */ - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len < vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (start_addr != mm->mmap_base) { - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = 0; - goto try_again; - } - -bottomup: - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = ~0UL; - - return addr; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/syscall_32.c b/ANDROID_3.4.5/arch/x86/kernel/syscall_32.c deleted file mode 100644 index 147fcd49..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/syscall_32.c +++ /dev/null @@ -1,25 +0,0 @@ -/* System call table for i386. */ - -#include <linux/linkage.h> -#include <linux/sys.h> -#include <linux/cache.h> -#include <asm/asm-offsets.h> - -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; -#include <asm/syscalls_32.h> -#undef __SYSCALL_I386 - -#define __SYSCALL_I386(nr, sym, compat) [nr] = sym, - -typedef asmlinkage void (*sys_call_ptr_t)(void); - -extern asmlinkage void sys_ni_syscall(void); - -const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { - /* - * Smells like a compiler bug -- it doesn't work - * when the & below is removed. - */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, -#include <asm/syscalls_32.h> -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/syscall_64.c b/ANDROID_3.4.5/arch/x86/kernel/syscall_64.c deleted file mode 100644 index 5c7f8c20..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/syscall_64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* System call table for x86-64. */ - -#include <linux/linkage.h> -#include <linux/sys.h> -#include <linux/cache.h> -#include <asm/asm-offsets.h> - -#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) - -#ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat) -#else -# define __SYSCALL_X32(nr, sym, compat) /* nothing */ -#endif - -#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; -#include <asm/syscalls_64.h> -#undef __SYSCALL_64 - -#define __SYSCALL_64(nr, sym, compat) [nr] = sym, - -typedef void (*sys_call_ptr_t)(void); - -extern void sys_ni_syscall(void); - -const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { - /* - * Smells like a compiler bug -- it doesn't work - * when the & below is removed. - */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, -#include <asm/syscalls_64.h> -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/tboot.c b/ANDROID_3.4.5/arch/x86/kernel/tboot.c deleted file mode 100644 index 6410744a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/tboot.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * tboot.c: main implementation of helper functions used by kernel for - * runtime support of Intel(R) Trusted Execution Technology - * - * Copyright (c) 2006-2009, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#include <linux/dma_remapping.h> -#include <linux/init_task.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/dmar.h> -#include <linux/cpu.h> -#include <linux/pfn.h> -#include <linux/mm.h> -#include <linux/tboot.h> - -#include <asm/trampoline.h> -#include <asm/processor.h> -#include <asm/bootparam.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/swiotlb.h> -#include <asm/fixmap.h> -#include <asm/proto.h> -#include <asm/setup.h> -#include <asm/e820.h> -#include <asm/io.h> - -#include "acpi/realmode/wakeup.h" - -/* Global pointer to shared data; NULL means no measured launch. */ -struct tboot *tboot __read_mostly; -EXPORT_SYMBOL(tboot); - -/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ -#define AP_WAIT_TIMEOUT 1 - -#undef pr_fmt -#define pr_fmt(fmt) "tboot: " fmt - -static u8 tboot_uuid[16] __initdata = TBOOT_UUID; - -void __init tboot_probe(void) -{ - /* Look for valid page-aligned address for shared page. */ - if (!boot_params.tboot_addr) - return; - /* - * also verify that it is mapped as we expect it before calling - * set_fixmap(), to reduce chance of garbage value causing crash - */ - if (!e820_any_mapped(boot_params.tboot_addr, - boot_params.tboot_addr, E820_RESERVED)) { - pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n"); - return; - } - - /* only a natively booted kernel should be using TXT */ - if (paravirt_enabled()) { - pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); - return; - } - - /* Map and check for tboot UUID. */ - set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); - tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); - if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) { - pr_warning("tboot at 0x%llx is invalid\n", - boot_params.tboot_addr); - tboot = NULL; - return; - } - if (tboot->version < 5) { - pr_warning("tboot version is invalid: %u\n", tboot->version); - tboot = NULL; - return; - } - - pr_info("found shared page at phys addr 0x%llx:\n", - boot_params.tboot_addr); - pr_debug("version: %d\n", tboot->version); - pr_debug("log_addr: 0x%08x\n", tboot->log_addr); - pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry); - pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base); - pr_debug("tboot_size: 0x%x\n", tboot->tboot_size); -} - -static pgd_t *tboot_pg_dir; -static struct mm_struct tboot_mm = { - .mm_rb = RB_ROOT, - .pgd = swapper_pg_dir, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), - .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), - .mmlist = LIST_HEAD_INIT(init_mm.mmlist), -}; - -static inline void switch_to_tboot_pt(void) -{ - write_cr3(virt_to_phys(tboot_pg_dir)); -} - -static int map_tboot_page(unsigned long vaddr, unsigned long pfn, - pgprot_t prot) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = pgd_offset(&tboot_mm, vaddr); - pud = pud_alloc(&tboot_mm, pgd, vaddr); - if (!pud) - return -1; - pmd = pmd_alloc(&tboot_mm, pud, vaddr); - if (!pmd) - return -1; - pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr); - if (!pte) - return -1; - set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); - pte_unmap(pte); - return 0; -} - -static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn, - unsigned long nr) -{ - /* Reuse the original kernel mapping */ - tboot_pg_dir = pgd_alloc(&tboot_mm); - if (!tboot_pg_dir) - return -1; - - for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) { - if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC)) - return -1; - } - - return 0; -} - -static void tboot_create_trampoline(void) -{ - u32 map_base, map_size; - - /* Create identity map for tboot shutdown code. */ - map_base = PFN_DOWN(tboot->tboot_base); - map_size = PFN_UP(tboot->tboot_size); - if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size)) - panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", - map_base, map_size); -} - -#ifdef CONFIG_ACPI_SLEEP - -static void add_mac_region(phys_addr_t start, unsigned long size) -{ - struct tboot_mac_region *mr; - phys_addr_t end = start + size; - - if (tboot->num_mac_regions >= MAX_TB_MAC_REGIONS) - panic("tboot: Too many MAC regions\n"); - - if (start && size) { - mr = &tboot->mac_regions[tboot->num_mac_regions++]; - mr->start = round_down(start, PAGE_SIZE); - mr->size = round_up(end, PAGE_SIZE) - mr->start; - } -} - -static int tboot_setup_sleep(void) -{ - int i; - - tboot->num_mac_regions = 0; - - for (i = 0; i < e820.nr_map; i++) { - if ((e820.map[i].type != E820_RAM) - && (e820.map[i].type != E820_RESERVED_KERN)) - continue; - - add_mac_region(e820.map[i].addr, e820.map[i].size); - } - - tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; - - return 0; -} - -#else /* no CONFIG_ACPI_SLEEP */ - -static int tboot_setup_sleep(void) -{ - /* S3 shutdown requested, but S3 not supported by the kernel... */ - BUG(); - return -1; -} - -#endif - -void tboot_shutdown(u32 shutdown_type) -{ - void (*shutdown)(void); - - if (!tboot_enabled()) - return; - - /* - * if we're being called before the 1:1 mapping is set up then just - * return and let the normal shutdown happen; this should only be - * due to very early panic() - */ - if (!tboot_pg_dir) - return; - - /* if this is S3 then set regions to MAC */ - if (shutdown_type == TB_SHUTDOWN_S3) - if (tboot_setup_sleep()) - return; - - tboot->shutdown_type = shutdown_type; - - switch_to_tboot_pt(); - - shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry; - shutdown(); - - /* should not reach here */ - while (1) - halt(); -} - -static void tboot_copy_fadt(const struct acpi_table_fadt *fadt) -{ -#define TB_COPY_GAS(tbg, g) \ - tbg.space_id = g.space_id; \ - tbg.bit_width = g.bit_width; \ - tbg.bit_offset = g.bit_offset; \ - tbg.access_width = g.access_width; \ - tbg.address = g.address; - - TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block); - TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block); - TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block); - TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block); - - /* - * We need phys addr of waking vector, but can't use virt_to_phys() on - * &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys - * addr. - */ - tboot->acpi_sinfo.wakeup_vector = fadt->facs + - offsetof(struct acpi_table_facs, firmware_waking_vector); -} - -static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) -{ - static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { - /* S0,1,2: */ -1, -1, -1, - /* S3: */ TB_SHUTDOWN_S3, - /* S4: */ TB_SHUTDOWN_S4, - /* S5: */ TB_SHUTDOWN_S5 }; - - if (!tboot_enabled()) - return 0; - - tboot_copy_fadt(&acpi_gbl_FADT); - tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; - tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control; - /* we always use the 32b wakeup vector */ - tboot->acpi_sinfo.vector_width = 32; - - if (sleep_state >= ACPI_S_STATE_COUNT || - acpi_shutdown_map[sleep_state] == -1) { - pr_warning("unsupported sleep state 0x%x\n", sleep_state); - return -1; - } - - tboot_shutdown(acpi_shutdown_map[sleep_state]); - return 0; -} - -static atomic_t ap_wfs_count; - -static int tboot_wait_for_aps(int num_aps) -{ - unsigned long timeout; - - timeout = AP_WAIT_TIMEOUT*HZ; - while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps && - timeout) { - mdelay(1); - timeout--; - } - - if (timeout) - pr_warning("tboot wait for APs timeout\n"); - - return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); -} - -static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - switch (action) { - case CPU_DYING: - atomic_inc(&ap_wfs_count); - if (num_online_cpus() == 1) - if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) - return NOTIFY_BAD; - break; - } - return NOTIFY_OK; -} - -static struct notifier_block tboot_cpu_notifier __cpuinitdata = -{ - .notifier_call = tboot_cpu_callback, -}; - -static __init int tboot_late_init(void) -{ - if (!tboot_enabled()) - return 0; - - tboot_create_trampoline(); - - atomic_set(&ap_wfs_count, 0); - register_hotcpu_notifier(&tboot_cpu_notifier); - - acpi_os_set_prepare_sleep(&tboot_sleep); - return 0; -} - -late_initcall(tboot_late_init); - -/* - * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE) - */ - -#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000 -#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000 - -/* # pages for each config regs space - used by fixmap */ -#define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \ - TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT) - -/* offsets from pub/priv config space */ -#define TXTCR_HEAP_BASE 0x0300 -#define TXTCR_HEAP_SIZE 0x0308 - -#define SHA1_SIZE 20 - -struct sha1_hash { - u8 hash[SHA1_SIZE]; -}; - -struct sinit_mle_data { - u32 version; /* currently 6 */ - struct sha1_hash bios_acm_id; - u32 edx_senter_flags; - u64 mseg_valid; - struct sha1_hash sinit_hash; - struct sha1_hash mle_hash; - struct sha1_hash stm_hash; - struct sha1_hash lcp_policy_hash; - u32 lcp_policy_control; - u32 rlp_wakeup_addr; - u32 reserved; - u32 num_mdrs; - u32 mdrs_off; - u32 num_vtd_dmars; - u32 vtd_dmars_off; -} __packed; - -struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl) -{ - void *heap_base, *heap_ptr, *config; - - if (!tboot_enabled()) - return dmar_tbl; - - /* - * ACPI tables may not be DMA protected by tboot, so use DMAR copy - * SINIT saved in SinitMleData in TXT heap (which is DMA protected) - */ - - /* map config space in order to get heap addr */ - config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES * - PAGE_SIZE); - if (!config) - return NULL; - - /* now map TXT heap */ - heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE), - *(u64 *)(config + TXTCR_HEAP_SIZE)); - iounmap(config); - if (!heap_base) - return NULL; - - /* walk heap to SinitMleData */ - /* skip BiosData */ - heap_ptr = heap_base + *(u64 *)heap_base; - /* skip OsMleData */ - heap_ptr += *(u64 *)heap_ptr; - /* skip OsSinitData */ - heap_ptr += *(u64 *)heap_ptr; - /* now points to SinitMleDataSize; set to SinitMleData */ - heap_ptr += sizeof(u64); - /* get addr of DMAR table */ - dmar_tbl = (struct acpi_table_header *)(heap_ptr + - ((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off - - sizeof(u64)); - - /* don't unmap heap because dmar.c needs access to this */ - - return dmar_tbl; -} - -int tboot_force_iommu(void) -{ - if (!tboot_enabled()) - return 0; - - if (no_iommu || swiotlb || dmar_disabled) - pr_warning("Forcing Intel-IOMMU to enabled\n"); - - dmar_disabled = 0; -#ifdef CONFIG_SWIOTLB - swiotlb = 0; -#endif - no_iommu = 0; - - return 1; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/tce_64.c b/ANDROID_3.4.5/arch/x86/kernel/tce_64.c deleted file mode 100644 index ab40954e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/tce_64.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * This file manages the translation entries for the IBM Calgary IOMMU. - * - * Derived from arch/powerpc/platforms/pseries/iommu.c - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Jon Mason <jdmason@us.ibm.com> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/bootmem.h> -#include <asm/tce.h> -#include <asm/calgary.h> -#include <asm/proto.h> -#include <asm/cacheflush.h> - -/* flush a tce at 'tceaddr' to main memory */ -static inline void flush_tce(void* tceaddr) -{ - /* a single tce can't cross a cache line */ - if (cpu_has_clflush) - clflush(tceaddr); - else - wbinvd(); -} - -void tce_build(struct iommu_table *tbl, unsigned long index, - unsigned int npages, unsigned long uaddr, int direction) -{ - u64* tp; - u64 t; - u64 rpn; - - t = (1 << TCE_READ_SHIFT); - if (direction != DMA_TO_DEVICE) - t |= (1 << TCE_WRITE_SHIFT); - - tp = ((u64*)tbl->it_base) + index; - - while (npages--) { - rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT; - t &= ~TCE_RPN_MASK; - t |= (rpn << TCE_RPN_SHIFT); - - *tp = cpu_to_be64(t); - flush_tce(tp); - - uaddr += PAGE_SIZE; - tp++; - } -} - -void tce_free(struct iommu_table *tbl, long index, unsigned int npages) -{ - u64* tp; - - tp = ((u64*)tbl->it_base) + index; - - while (npages--) { - *tp = cpu_to_be64(0); - flush_tce(tp); - tp++; - } -} - -static inline unsigned int table_size_to_number_of_entries(unsigned char size) -{ - /* - * size is the order of the table, 0-7 - * smallest table is 8K entries, so shift result by 13 to - * multiply by 8K - */ - return (1 << size) << 13; -} - -static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl) -{ - unsigned int bitmapsz; - unsigned long bmppages; - int ret; - - tbl->it_busno = dev->bus->number; - - /* set the tce table size - measured in entries */ - tbl->it_size = table_size_to_number_of_entries(specified_table_size); - - /* - * number of bytes needed for the bitmap size in number of - * entries; we need one bit per entry - */ - bitmapsz = tbl->it_size / BITS_PER_BYTE; - bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz)); - if (!bmppages) { - printk(KERN_ERR "Calgary: cannot allocate bitmap\n"); - ret = -ENOMEM; - goto done; - } - - tbl->it_map = (unsigned long*)bmppages; - - memset(tbl->it_map, 0, bitmapsz); - - tbl->it_hint = 0; - - spin_lock_init(&tbl->it_lock); - - return 0; - -done: - return ret; -} - -int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar) -{ - struct iommu_table *tbl; - int ret; - - if (pci_iommu(dev->bus)) { - printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n", - dev, pci_iommu(dev->bus)); - BUG(); - } - - tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL); - if (!tbl) { - printk(KERN_ERR "Calgary: error allocating iommu_table\n"); - ret = -ENOMEM; - goto done; - } - - ret = tce_table_setparms(dev, tbl); - if (ret) - goto free_tbl; - - tbl->bbar = bbar; - - set_pci_iommu(dev->bus, tbl); - - return 0; - -free_tbl: - kfree(tbl); -done: - return ret; -} - -void * __init alloc_tce_table(void) -{ - unsigned int size; - - size = table_size_to_number_of_entries(specified_table_size); - size *= TCE_ENTRY_SIZE; - - return __alloc_bootmem_low(size, size, 0); -} - -void __init free_tce_table(void *tbl) -{ - unsigned int size; - - if (!tbl) - return; - - size = table_size_to_number_of_entries(specified_table_size); - size *= TCE_ENTRY_SIZE; - - free_bootmem(__pa(tbl), size); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/test_nx.c b/ANDROID_3.4.5/arch/x86/kernel/test_nx.c deleted file mode 100644 index 3f92ce07..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/test_nx.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * test_nx.c: functional test for NX functionality - * - * (C) Copyright 2008 Intel Corporation - * Author: Arjan van de Ven <arjan@linux.intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#include <linux/module.h> -#include <linux/sort.h> -#include <linux/slab.h> - -#include <asm/uaccess.h> -#include <asm/asm.h> - -extern int rodata_test_data; - -/* - * This file checks 4 things: - * 1) Check if the stack is not executable - * 2) Check if kmalloc memory is not executable - * 3) Check if the .rodata section is not executable - * 4) Check if the .data section of a module is not executable - * - * To do this, the test code tries to execute memory in stack/kmalloc/etc, - * and then checks if the expected trap happens. - * - * Sadly, this implies having a dynamic exception handling table entry. - * ... which can be done (and will make Rusty cry)... but it can only - * be done in a stand-alone module with only 1 entry total. - * (otherwise we'd have to sort and that's just too messy) - */ - - - -/* - * We want to set up an exception handling point on our stack, - * which means a variable value. This function is rather dirty - * and walks the exception table of the module, looking for a magic - * marker and replaces it with a specific function. - */ -static void fudze_exception_table(void *marker, void *new) -{ - struct module *mod = THIS_MODULE; - struct exception_table_entry *extable; - - /* - * Note: This module has only 1 exception table entry, - * so searching and sorting is not needed. If that changes, - * this would be the place to search and re-sort the exception - * table. - */ - if (mod->num_exentries > 1) { - printk(KERN_ERR "test_nx: too many exception table entries!\n"); - printk(KERN_ERR "test_nx: test results are not reliable.\n"); - return; - } - extable = (struct exception_table_entry *)mod->extable; - extable[0].insn = (unsigned long)new; -} - - -/* - * exception tables get their symbols translated so we need - * to use a fake function to put in there, which we can then - * replace at runtime. - */ -void foo_label(void); - -/* - * returns 0 for not-executable, negative for executable - * - * Note: we cannot allow this function to be inlined, because - * that would give us more than 1 exception table entry. - * This in turn would break the assumptions above. - */ -static noinline int test_address(void *address) -{ - unsigned long result; - - /* Set up an exception table entry for our address */ - fudze_exception_table(&foo_label, address); - result = 1; - asm volatile( - "foo_label:\n" - "0: call *%[fake_code]\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: mov %[zero], %[rslt]\n" - " ret\n" - ".previous\n" - _ASM_EXTABLE(0b,2b) - : [rslt] "=r" (result) - : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result) - ); - /* change the exception table back for the next round */ - fudze_exception_table(address, &foo_label); - - if (result) - return -ENODEV; - return 0; -} - -static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */ - -static int test_NX(void) -{ - int ret = 0; - /* 0xC3 is the opcode for "ret" */ - char stackcode[] = {0xC3, 0x90, 0 }; - char *heap; - - test_data = 0xC3; - - printk(KERN_INFO "Testing NX protection\n"); - - /* Test 1: check if the stack is not executable */ - if (test_address(&stackcode)) { - printk(KERN_ERR "test_nx: stack was executable\n"); - ret = -ENODEV; - } - - - /* Test 2: Check if the heap is executable */ - heap = kmalloc(64, GFP_KERNEL); - if (!heap) - return -ENOMEM; - heap[0] = 0xC3; /* opcode for "ret" */ - - if (test_address(heap)) { - printk(KERN_ERR "test_nx: heap was executable\n"); - ret = -ENODEV; - } - kfree(heap); - - /* - * The following 2 tests currently fail, this needs to get fixed - * Until then, don't run them to avoid too many people getting scared - * by the error message - */ - -#ifdef CONFIG_DEBUG_RODATA - /* Test 3: Check if the .rodata section is executable */ - if (rodata_test_data != 0xC3) { - printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); - ret = -ENODEV; - } else if (test_address(&rodata_test_data)) { - printk(KERN_ERR "test_nx: .rodata section is executable\n"); - ret = -ENODEV; - } -#endif - -#if 0 - /* Test 4: Check if the .data section of a module is executable */ - if (test_address(&test_data)) { - printk(KERN_ERR "test_nx: .data section is executable\n"); - ret = -ENODEV; - } - -#endif - return ret; -} - -static void test_exit(void) -{ -} - -module_init(test_NX); -module_exit(test_exit); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the NX infrastructure"); -MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/ANDROID_3.4.5/arch/x86/kernel/test_rodata.c b/ANDROID_3.4.5/arch/x86/kernel/test_rodata.c deleted file mode 100644 index c29e2357..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/test_rodata.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * test_rodata.c: functional test for mark_rodata_ro function - * - * (C) Copyright 2008 Intel Corporation - * Author: Arjan van de Ven <arjan@linux.intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#include <linux/module.h> -#include <asm/cacheflush.h> -#include <asm/sections.h> - -int rodata_test(void) -{ - unsigned long result; - unsigned long start, end; - - /* test 1: read the value */ - /* If this test fails, some previous testrun has clobbered the state */ - if (!rodata_test_data) { - printk(KERN_ERR "rodata_test: test 1 fails (start data)\n"); - return -ENODEV; - } - - /* test 2: write to the variable; this should fault */ - /* - * If this test fails, we managed to overwrite the data - * - * This is written in assembly to be able to catch the - * exception that is supposed to happen in the correct - * case - */ - - result = 1; - asm volatile( - "0: mov %[zero],(%[rodata_test])\n" - " mov %[zero], %[rslt]\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: jmp 1b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 16\n" -#ifdef CONFIG_X86_32 - " .long 0b,2b\n" -#else - " .quad 0b,2b\n" -#endif - ".previous" - : [rslt] "=r" (result) - : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) - ); - - - if (!result) { - printk(KERN_ERR "rodata_test: test data was not read only\n"); - return -ENODEV; - } - - /* test 3: check the value hasn't changed */ - /* If this test fails, we managed to overwrite the data */ - if (!rodata_test_data) { - printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n"); - return -ENODEV; - } - /* test 4: check if the rodata section is 4Kb aligned */ - start = (unsigned long)__start_rodata; - end = (unsigned long)__end_rodata; - if (start & (PAGE_SIZE - 1)) { - printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n"); - return -ENODEV; - } - if (end & (PAGE_SIZE - 1)) { - printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n"); - return -ENODEV; - } - - return 0; -} - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure"); -MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/ANDROID_3.4.5/arch/x86/kernel/time.c b/ANDROID_3.4.5/arch/x86/kernel/time.c deleted file mode 100644 index c6eba2b4..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/time.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 1991,1992,1995 Linus Torvalds - * Copyright (c) 1994 Alan Modra - * Copyright (c) 1995 Markus Kuhn - * Copyright (c) 1996 Ingo Molnar - * Copyright (c) 1998 Andrea Arcangeli - * Copyright (c) 2002,2006 Vojtech Pavlik - * Copyright (c) 2003 Andi Kleen - * - */ - -#include <linux/clockchips.h> -#include <linux/interrupt.h> -#include <linux/i8253.h> -#include <linux/time.h> -#include <linux/export.h> -#include <linux/mca.h> - -#include <asm/vsyscall.h> -#include <asm/x86_init.h> -#include <asm/i8259.h> -#include <asm/timer.h> -#include <asm/hpet.h> -#include <asm/time.h> - -#ifdef CONFIG_X86_64 -DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; -#endif - -unsigned long profile_pc(struct pt_regs *regs) -{ - unsigned long pc = instruction_pointer(regs); - - if (!user_mode_vm(regs) && in_lock_functions(pc)) { -#ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); -#else - unsigned long *sp = - (unsigned long *)kernel_stack_pointer(regs); - /* - * Return address is either directly at stack pointer - * or above a saved flags. Eflags has bits 22-31 zero, - * kernel addresses don't. - */ - if (sp[0] >> 22) - return sp[0]; - if (sp[1] >> 22) - return sp[1]; -#endif - } - return pc; -} -EXPORT_SYMBOL(profile_pc); - -/* - * Default timer interrupt handler for PIT/HPET - */ -static irqreturn_t timer_interrupt(int irq, void *dev_id) -{ - global_clock_event->event_handler(global_clock_event); - - /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ - if (MCA_bus) - outb_p(inb_p(0x61)| 0x80, 0x61); - - return IRQ_HANDLED; -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, - .name = "timer" -}; - -void __init setup_default_timer_irq(void) -{ - setup_irq(0, &irq0); -} - -/* Default timer init function */ -void __init hpet_time_init(void) -{ - if (!hpet_enable()) - setup_pit_timer(); - setup_default_timer_irq(); -} - -static __init void x86_late_time_init(void) -{ - x86_init.timers.timer_init(); - tsc_init(); -} - -/* - * Initialize TSC and delay the periodic timer init to - * late x86_late_time_init() so ioremap works. - */ -void __init time_init(void) -{ - late_time_init = x86_late_time_init; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/tls.c b/ANDROID_3.4.5/arch/x86/kernel/tls.c deleted file mode 100644 index 9d9d2f9e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/tls.c +++ /dev/null @@ -1,217 +0,0 @@ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/user.h> -#include <linux/regset.h> - -#include <asm/uaccess.h> -#include <asm/desc.h> -#include <asm/ldt.h> -#include <asm/processor.h> -#include <asm/proto.h> -#include <asm/syscalls.h> - -#include "tls.h" - -/* - * sys_alloc_thread_area: get a yet unused TLS descriptor index. - */ -static int get_free_idx(void) -{ - struct thread_struct *t = ¤t->thread; - int idx; - - for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) - if (desc_empty(&t->tls_array[idx])) - return idx + GDT_ENTRY_TLS_MIN; - return -ESRCH; -} - -static void set_tls_desc(struct task_struct *p, int idx, - const struct user_desc *info, int n) -{ - struct thread_struct *t = &p->thread; - struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN]; - int cpu; - - /* - * We must not get preempted while modifying the TLS. - */ - cpu = get_cpu(); - - while (n-- > 0) { - if (LDT_empty(info)) - desc->a = desc->b = 0; - else - fill_ldt(desc, info); - ++info; - ++desc; - } - - if (t == ¤t->thread) - load_TLS(t, cpu); - - put_cpu(); -} - -/* - * Set a given TLS descriptor: - */ -int do_set_thread_area(struct task_struct *p, int idx, - struct user_desc __user *u_info, - int can_allocate) -{ - struct user_desc info; - - if (copy_from_user(&info, u_info, sizeof(info))) - return -EFAULT; - - if (idx == -1) - idx = info.entry_number; - - /* - * index -1 means the kernel should try to find and - * allocate an empty descriptor: - */ - if (idx == -1 && can_allocate) { - idx = get_free_idx(); - if (idx < 0) - return idx; - if (put_user(idx, &u_info->entry_number)) - return -EFAULT; - } - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - set_tls_desc(p, idx, &info, 1); - - return 0; -} - -asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) -{ - int ret = do_set_thread_area(current, -1, u_info, 1); - asmlinkage_protect(1, ret, u_info); - return ret; -} - - -/* - * Get the current Thread-Local Storage area: - */ - -static void fill_user_desc(struct user_desc *info, int idx, - const struct desc_struct *desc) - -{ - memset(info, 0, sizeof(*info)); - info->entry_number = idx; - info->base_addr = get_desc_base(desc); - info->limit = get_desc_limit(desc); - info->seg_32bit = desc->d; - info->contents = desc->type >> 2; - info->read_exec_only = !(desc->type & 2); - info->limit_in_pages = desc->g; - info->seg_not_present = !desc->p; - info->useable = desc->avl; -#ifdef CONFIG_X86_64 - info->lm = desc->l; -#endif -} - -int do_get_thread_area(struct task_struct *p, int idx, - struct user_desc __user *u_info) -{ - struct user_desc info; - - if (idx == -1 && get_user(idx, &u_info->entry_number)) - return -EFAULT; - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - fill_user_desc(&info, idx, - &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]); - - if (copy_to_user(u_info, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) -{ - int ret = do_get_thread_area(current, -1, u_info); - asmlinkage_protect(1, ret, u_info); - return ret; -} - -int regset_tls_active(struct task_struct *target, - const struct user_regset *regset) -{ - struct thread_struct *t = &target->thread; - int n = GDT_ENTRY_TLS_ENTRIES; - while (n > 0 && desc_empty(&t->tls_array[n - 1])) - --n; - return n; -} - -int regset_tls_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - const struct desc_struct *tls; - - if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || - (pos % sizeof(struct user_desc)) != 0 || - (count % sizeof(struct user_desc)) != 0) - return -EINVAL; - - pos /= sizeof(struct user_desc); - count /= sizeof(struct user_desc); - - tls = &target->thread.tls_array[pos]; - - if (kbuf) { - struct user_desc *info = kbuf; - while (count-- > 0) - fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++, - tls++); - } else { - struct user_desc __user *u_info = ubuf; - while (count-- > 0) { - struct user_desc info; - fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++); - if (__copy_to_user(u_info++, &info, sizeof(info))) - return -EFAULT; - } - } - - return 0; -} - -int regset_tls_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; - const struct user_desc *info; - - if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || - (pos % sizeof(struct user_desc)) != 0 || - (count % sizeof(struct user_desc)) != 0) - return -EINVAL; - - if (kbuf) - info = kbuf; - else if (__copy_from_user(infobuf, ubuf, count)) - return -EFAULT; - else - info = infobuf; - - set_tls_desc(target, - GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), - info, count / sizeof(struct user_desc)); - - return 0; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/tls.h b/ANDROID_3.4.5/arch/x86/kernel/tls.h deleted file mode 100644 index 2f083a2f..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/tls.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Internal declarations for x86 TLS implementation functions. - * - * Copyright (C) 2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * Red Hat Author: Roland McGrath. - */ - -#ifndef _ARCH_X86_KERNEL_TLS_H - -#include <linux/regset.h> - -extern user_regset_active_fn regset_tls_active; -extern user_regset_get_fn regset_tls_get; -extern user_regset_set_fn regset_tls_set; - -#endif /* _ARCH_X86_KERNEL_TLS_H */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/topology.c b/ANDROID_3.4.5/arch/x86/kernel/topology.c deleted file mode 100644 index 76ee9770..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/topology.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Populate sysfs with topology information - * - * Written by: Matthew Dobson, IBM Corporation - * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <colpatch@us.ibm.com> - */ -#include <linux/nodemask.h> -#include <linux/export.h> -#include <linux/mmzone.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <asm/cpu.h> - -static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); - -#ifdef CONFIG_HOTPLUG_CPU -int __ref arch_register_cpu(int num) -{ - /* - * CPU0 cannot be offlined due to several - * restrictions and assumptions in kernel. This basically - * doesn't add a control file, one cannot attempt to offline - * BSP. - * - * Also certain PCI quirks require not to enable hotplug control - * for all CPU's. - */ - if (num) - per_cpu(cpu_devices, num).cpu.hotpluggable = 1; - - return register_cpu(&per_cpu(cpu_devices, num).cpu, num); -} -EXPORT_SYMBOL(arch_register_cpu); - -void arch_unregister_cpu(int num) -{ - unregister_cpu(&per_cpu(cpu_devices, num).cpu); -} -EXPORT_SYMBOL(arch_unregister_cpu); -#else /* CONFIG_HOTPLUG_CPU */ - -static int __init arch_register_cpu(int num) -{ - return register_cpu(&per_cpu(cpu_devices, num).cpu, num); -} -#endif /* CONFIG_HOTPLUG_CPU */ - -static int __init topology_init(void) -{ - int i; - -#ifdef CONFIG_NUMA - for_each_online_node(i) - register_one_node(i); -#endif - - for_each_present_cpu(i) - arch_register_cpu(i); - - return 0; -} -subsys_initcall(topology_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/trampoline.c b/ANDROID_3.4.5/arch/x86/kernel/trampoline.c deleted file mode 100644 index a73b6105..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/trampoline.c +++ /dev/null @@ -1,42 +0,0 @@ -#include <linux/io.h> -#include <linux/memblock.h> - -#include <asm/trampoline.h> -#include <asm/cacheflush.h> -#include <asm/pgtable.h> - -unsigned char *x86_trampoline_base; - -void __init setup_trampolines(void) -{ - phys_addr_t mem; - size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); - - /* Has to be in very low memory so we can execute real-mode AP code. */ - mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (!mem) - panic("Cannot allocate trampoline\n"); - - x86_trampoline_base = __va(mem); - memblock_reserve(mem, size); - - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - x86_trampoline_base, (unsigned long long)mem, size); - - memcpy(x86_trampoline_base, x86_trampoline_start, size); -} - -/* - * setup_trampolines() gets called very early, to guarantee the - * availability of low memory. This is before the proper kernel page - * tables are set up, so we cannot set page permissions in that - * function. Thus, we use an arch_initcall instead. - */ -static int __init configure_trampolines(void) -{ - size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); - - set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT); - return 0; -} -arch_initcall(configure_trampolines); diff --git a/ANDROID_3.4.5/arch/x86/kernel/trampoline_32.S b/ANDROID_3.4.5/arch/x86/kernel/trampoline_32.S deleted file mode 100644 index 451c0a7e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/trampoline_32.S +++ /dev/null @@ -1,83 +0,0 @@ -/* - * - * Trampoline.S Derived from Setup.S by Linus Torvalds - * - * 4 Jan 1997 Michael Chastain: changed to gnu as. - * - * This is only used for booting secondary CPUs in SMP machine - * - * Entry: CS:IP point to the start of our code, we are - * in real mode with no stack, but the rest of the - * trampoline page to make our stack and everything else - * is a mystery. - * - * We jump into arch/x86/kernel/head_32.S. - * - * On entry to trampoline_data, the processor is in real mode - * with 16-bit addressing and 16-bit data. CS has some value - * and IP is zero. Thus, data addresses need to be absolute - * (no relocation) and are taken with regard to r_base. - * - * If you work on this file, check the object module with - * objdump --reloc to make sure there are no relocation - * entries except for: - * - * TYPE VALUE - * R_386_32 startup_32_smp - * R_386_32 boot_gdt - */ - -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/segment.h> -#include <asm/page_types.h> - -#ifdef CONFIG_SMP - - .section ".x86_trampoline","a" - .balign PAGE_SIZE - .code16 - -ENTRY(trampoline_data) -r_base = . - wbinvd # Needed for NUMA-Q should be harmless for others - mov %cs, %ax # Code and data in the same place - mov %ax, %ds - - cli # We should be safe anyway - - movl $0xA5A5A5A5, trampoline_status - r_base - # write marker for master knows we're running - - /* GDT tables in non default location kernel can be beyond 16MB and - * lgdt will not be able to load the address as in real mode default - * operand size is 16bit. Use lgdtl instead to force operand size - * to 32 bit. - */ - - lidtl boot_idt_descr - r_base # load idt with 0, 0 - lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate - - xor %ax, %ax - inc %ax # protected mode (PE) bit - lmsw %ax # into protected mode - # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S - ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET) - - # These need to be in the same 64K segment as the above; - # hence we don't use the boot_gdt_descr defined in head.S -boot_gdt_descr: - .word __BOOT_DS + 7 # gdt limit - .long boot_gdt - __PAGE_OFFSET # gdt base - -boot_idt_descr: - .word 0 # idt limit = 0 - .long 0 # idt base = 0L - -ENTRY(trampoline_status) - .long 0 - -.globl trampoline_end -trampoline_end: - -#endif /* CONFIG_SMP */ diff --git a/ANDROID_3.4.5/arch/x86/kernel/trampoline_64.S b/ANDROID_3.4.5/arch/x86/kernel/trampoline_64.S deleted file mode 100644 index 09ff5179..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/trampoline_64.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * - * Trampoline.S Derived from Setup.S by Linus Torvalds - * - * 4 Jan 1997 Michael Chastain: changed to gnu as. - * 15 Sept 2005 Eric Biederman: 64bit PIC support - * - * Entry: CS:IP point to the start of our code, we are - * in real mode with no stack, but the rest of the - * trampoline page to make our stack and everything else - * is a mystery. - * - * On entry to trampoline_data, the processor is in real mode - * with 16-bit addressing and 16-bit data. CS has some value - * and IP is zero. Thus, data addresses need to be absolute - * (no relocation) and are taken with regard to r_base. - * - * With the addition of trampoline_level4_pgt this code can - * now enter a 64bit kernel that lives at arbitrary 64bit - * physical addresses. - * - * If you work on this file, check the object module with objdump - * --full-contents --reloc to make sure there are no relocation - * entries. - */ - -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/pgtable_types.h> -#include <asm/page_types.h> -#include <asm/msr.h> -#include <asm/segment.h> -#include <asm/processor-flags.h> - - .section ".x86_trampoline","a" - .balign PAGE_SIZE - .code16 - -ENTRY(trampoline_data) -r_base = . - cli # We should be safe anyway - wbinvd - mov %cs, %ax # Code and data in the same place - mov %ax, %ds - mov %ax, %es - mov %ax, %ss - - - movl $0xA5A5A5A5, trampoline_status - r_base - # write marker for master knows we're running - - # Setup stack - movw $(trampoline_stack_end - r_base), %sp - - call verify_cpu # Verify the cpu supports long mode - testl %eax, %eax # Check for return code - jnz no_longmode - - mov %cs, %ax - movzx %ax, %esi # Find the 32bit trampoline location - shll $4, %esi - - # Fixup the absolute vectors - leal (startup_32 - r_base)(%esi), %eax - movl %eax, startup_32_vector - r_base - leal (startup_64 - r_base)(%esi), %eax - movl %eax, startup_64_vector - r_base - leal (tgdt - r_base)(%esi), %eax - movl %eax, (tgdt + 2 - r_base) - - /* - * GDT tables in non default location kernel can be beyond 16MB and - * lgdt will not be able to load the address as in real mode default - * operand size is 16bit. Use lgdtl instead to force operand size - * to 32 bit. - */ - - lidtl tidt - r_base # load idt with 0, 0 - lgdtl tgdt - r_base # load gdt with whatever is appropriate - - mov $X86_CR0_PE, %ax # protected mode (PE) bit - lmsw %ax # into protected mode - - # flush prefetch and jump to startup_32 - ljmpl *(startup_32_vector - r_base) - - .code32 - .balign 4 -startup_32: - movl $__KERNEL_DS, %eax # Initialize the %ds segment register - movl %eax, %ds - - movl $X86_CR4_PAE, %eax - movl %eax, %cr4 # Enable PAE mode - - # Setup trampoline 4 level pagetables - leal (trampoline_level4_pgt - r_base)(%esi), %eax - movl %eax, %cr3 - - movl $MSR_EFER, %ecx - movl $(1 << _EFER_LME), %eax # Enable Long Mode - xorl %edx, %edx - wrmsr - - # Enable paging and in turn activate Long Mode - # Enable protected mode - movl $(X86_CR0_PG | X86_CR0_PE), %eax - movl %eax, %cr0 - - /* - * At this point we're in long mode but in 32bit compatibility mode - * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn - * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use - * the new gdt/idt that has __KERNEL_CS with CS.L = 1. - */ - ljmp *(startup_64_vector - r_base)(%esi) - - .code64 - .balign 4 -startup_64: - # Now jump into the kernel using virtual addresses - movq $secondary_startup_64, %rax - jmp *%rax - - .code16 -no_longmode: - hlt - jmp no_longmode -#include "verify_cpu.S" - - .balign 4 - # Careful these need to be in the same 64K segment as the above; -tidt: - .word 0 # idt limit = 0 - .word 0, 0 # idt base = 0L - - # Duplicate the global descriptor table - # so the kernel can live anywhere - .balign 4 -tgdt: - .short tgdt_end - tgdt # gdt limit - .long tgdt - r_base - .short 0 - .quad 0x00cf9b000000ffff # __KERNEL32_CS - .quad 0x00af9b000000ffff # __KERNEL_CS - .quad 0x00cf93000000ffff # __KERNEL_DS -tgdt_end: - - .balign 4 -startup_32_vector: - .long startup_32 - r_base - .word __KERNEL32_CS, 0 - - .balign 4 -startup_64_vector: - .long startup_64 - r_base - .word __KERNEL_CS, 0 - - .balign 4 -ENTRY(trampoline_status) - .long 0 - -trampoline_stack: - .org 0x1000 -trampoline_stack_end: -ENTRY(trampoline_level4_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .fill 510,8,0 - .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE - -ENTRY(trampoline_end) diff --git a/ANDROID_3.4.5/arch/x86/kernel/traps.c b/ANDROID_3.4.5/arch/x86/kernel/traps.c deleted file mode 100644 index ff9281f1..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/traps.c +++ /dev/null @@ -1,733 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@valinux.com>, May 2000 - */ - -/* - * Handle hardware traps and faults. - */ -#include <linux/interrupt.h> -#include <linux/kallsyms.h> -#include <linux/spinlock.h> -#include <linux/kprobes.h> -#include <linux/uaccess.h> -#include <linux/kdebug.h> -#include <linux/kgdb.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/ptrace.h> -#include <linux/string.h> -#include <linux/delay.h> -#include <linux/errno.h> -#include <linux/kexec.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/init.h> -#include <linux/bug.h> -#include <linux/nmi.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/io.h> - -#ifdef CONFIG_EISA -#include <linux/ioport.h> -#include <linux/eisa.h> -#endif - -#ifdef CONFIG_MCA -#include <linux/mca.h> -#endif - -#if defined(CONFIG_EDAC) -#include <linux/edac.h> -#endif - -#include <asm/kmemcheck.h> -#include <asm/stacktrace.h> -#include <asm/processor.h> -#include <asm/debugreg.h> -#include <linux/atomic.h> -#include <asm/traps.h> -#include <asm/desc.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/mce.h> - -#include <asm/mach_traps.h> - -#ifdef CONFIG_X86_64 -#include <asm/x86_init.h> -#include <asm/pgalloc.h> -#include <asm/proto.h> -#else -#include <asm/processor-flags.h> -#include <asm/setup.h> - -asmlinkage int system_call(void); - -/* Do we ignore FPU interrupts ? */ -char ignore_fpu_irq; - -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround. - */ -gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; -#endif - -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - -static inline void conditional_sti(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void preempt_conditional_sti(struct pt_regs *regs) -{ - inc_preempt_count(); - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void conditional_cli(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); -} - -static inline void preempt_conditional_cli(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); - dec_preempt_count(); -} - -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) -{ - struct task_struct *tsk = current; - -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) { - /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. - * On nmi (interrupt 2), do_trap should not be called. - */ - if (trapnr < X86_TRAP_UD) - goto vm86_trap; - goto trap_signal; - } -#endif - - if (!user_mode(regs)) - goto kernel_trap; - -#ifdef CONFIG_X86_32 -trap_signal: -#endif - /* - * We want error_code and trap_nr set for userspace faults and - * kernelspace faults which result in die(), but not - * kernelspace faults which are fixed up. die() gives the - * process no chance to handle the signal and notice the - * kernel fault information, so that won't result in polluting - * the information about previously queued, but not yet - * delivered, faults. See also do_general_protection below. - */ - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = trapnr; - -#ifdef CONFIG_X86_64 - if (show_unhandled_signals && unhandled_signal(tsk, signr) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } -#endif - - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif -} - -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ -} - -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ -} - -DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, - regs->ip) -DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) -DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, - regs->ip) -DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", - coprocessor_segment_overrun) -DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -#ifdef CONFIG_X86_32 -DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -#endif -DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, - BUS_ADRALN, 0) - -#ifdef CONFIG_X86_64 -/* Runs on IST stack */ -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) -{ - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); -} - -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) -{ - static const char str[] = "double fault"; - struct task_struct *tsk = current; - - /* Return not checked because double check cannot be ignored */ - notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_DF; - - /* - * This is always a kernel trap and never fixable (and thus must - * never return). - */ - for (;;) - die(str, regs, error_code); -} -#endif - -dotraplinkage void __kprobes -do_general_protection(struct pt_regs *regs, long error_code) -{ - struct task_struct *tsk; - - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; -#endif - - tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, task_pid_nr(tsk), - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } - - force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, - X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); -} - -/* May run on IST stack. */ -dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) -{ -#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP - if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, - SIGTRAP) == NOTIFY_STOP) - return; -#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ - - if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, - SIGTRAP) == NOTIFY_STOP) - return; - - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); - preempt_conditional_sti(regs); - do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); - debug_stack_usage_dec(); -} - -#ifdef CONFIG_X86_64 -/* - * Help handler running on IST stack to switch back to user stack - * for scheduling or signal handling. The actual stack switch is done in - * entry.S - */ -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) -{ - struct pt_regs *regs = eregs; - /* Did already sync */ - if (eregs == (struct pt_regs *)eregs->sp) - ; - /* Exception from user space */ - else if (user_mode(eregs)) - regs = task_pt_regs(current); - /* - * Exception from kernel and interrupts are enabled. Move to - * kernel process stack. - */ - else if (eregs->flags & X86_EFLAGS_IF) - regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); - if (eregs != regs) - *regs = *eregs; - return regs; -} -#endif - -/* - * Our handling of the processor debug registers is non-trivial. - * We do not clear them on entry and exit from the kernel. Therefore - * it is possible to get a watchpoint trap here from inside the kernel. - * However, the code in ./ptrace.c has ensured that the user can - * only set watchpoints on userspace addresses. Therefore the in-kernel - * watchpoint trap can only occur in code which is reading/writing - * from user space. Such code must not hold kernel locks (since it - * can equally take a page fault), therefore it is safe to call - * force_sig_info even though that claims and releases locks. - * - * Code in ./signal.c ensures that the debug control register - * is restored before we deliver any signal, and therefore that - * user code runs with the correct debug control register even though - * we clear it here. - * - * Being careful here means that we don't have to be as careful in a - * lot of more complicated places (task switching can be a bit lazy - * about restoring all the debug state, and ptrace doesn't have to - * find every occurrence of the TF bit that could be saved away even - * by user code) - * - * May run on IST stack. - */ -dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) -{ - struct task_struct *tsk = current; - int user_icebp = 0; - unsigned long dr6; - int si_code; - - get_debugreg(dr6, 6); - - /* Filter out all the reserved bits which are preset to 1 */ - dr6 &= ~DR6_RESERVED; - - /* - * If dr6 has no reason to give us about the origin of this trap, - * then it's very likely the result of an icebp/int01 trap. - * User wants a sigtrap for that. - */ - if (!dr6 && user_mode(regs)) - user_icebp = 1; - - /* Catch kmemcheck conditions first of all! */ - if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - return; - - /* DR6 may or may not be cleared by the CPU */ - set_debugreg(0, 6); - - /* - * The processor cleared BTF, so don't mark that we need it set. - */ - clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); - - /* Store the virtualized DR6 value */ - tsk->thread.debugreg6 = dr6; - - if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, - SIGTRAP) == NOTIFY_STOP) - return; - - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); - - /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); - - if (regs->flags & X86_VM_MASK) { - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, - X86_TRAP_DB); - preempt_conditional_cli(regs); - debug_stack_usage_dec(); - return; - } - - /* - * Single-stepping through system calls: ignore any exceptions in - * kernel space, but re-enable TF when returning to user mode. - * - * We already checked v86 mode above, so we can check for kernel mode - * by just checking the CPL of CS. - */ - if ((dr6 & DR_STEP) && !user_mode(regs)) { - tsk->thread.debugreg6 &= ~DR_STEP; - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - } - si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) - send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); - debug_stack_usage_dec(); - - return; -} - -/* - * Note that we play around with the 'TS' bit in an attempt to get - * the correct behaviour even in the presence of the asynchronous - * IRQ13 behaviour - */ -void math_error(struct pt_regs *regs, int error_code, int trapnr) -{ - struct task_struct *task = current; - siginfo_t info; - unsigned short err; - char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : - "simd exception"; - - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) - return; - conditional_sti(regs); - - if (!user_mode_vm(regs)) - { - if (!fixup_exception(regs)) { - task->thread.error_code = error_code; - task->thread.trap_nr = trapnr; - die(str, regs, error_code); - } - return; - } - - /* - * Save the info for the exception handler and clear the error. - */ - save_init_fpu(task); - task->thread.trap_nr = trapnr; - task->thread.error_code = error_code; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)regs->ip; - if (trapnr == X86_TRAP_MF) { - unsigned short cwd, swd; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't synchronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); - - err = swd & ~cwd; - } else { - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - unsigned short mxcsr = get_fpu_mxcsr(task); - err = ~(mxcsr >> 7) & mxcsr; - } - - if (err & 0x001) { /* Invalid op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - info.si_code = FPE_FLTINV; - } else if (err & 0x004) { /* Divide by Zero */ - info.si_code = FPE_FLTDIV; - } else if (err & 0x008) { /* Overflow */ - info.si_code = FPE_FLTOVF; - } else if (err & 0x012) { /* Denormal, Underflow */ - info.si_code = FPE_FLTUND; - } else if (err & 0x020) { /* Precision */ - info.si_code = FPE_FLTRES; - } else { - /* - * If we're using IRQ 13, or supposedly even some trap - * X86_TRAP_MF implementations, it's possible - * we get a spurious trap, which is not an error. - */ - return; - } - force_sig_info(SIGFPE, &info, task); -} - -dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) -{ -#ifdef CONFIG_X86_32 - ignore_fpu_irq = 1; -#endif - - math_error(regs, error_code, X86_TRAP_MF); -} - -dotraplinkage void -do_simd_coprocessor_error(struct pt_regs *regs, long error_code) -{ - math_error(regs, error_code, X86_TRAP_XF); -} - -dotraplinkage void -do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) -{ - conditional_sti(regs); -#if 0 - /* No need to warn about this any longer. */ - printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); -#endif -} - -asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) -{ -} - -asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) -{ -} - -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - * - * Must be called with kernel preemption disabled (eg with local - * local interrupts as in the case of do_device_not_available). - */ -void math_state_restore(void) -{ - struct task_struct *tsk = current; - - if (!tsk_used_math(tsk)) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (init_fpu(tsk)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } - - __thread_fpu_begin(tsk); - /* - * Paranoid restore. send a SIGSEGV if we fail to restore the state. - */ - if (unlikely(restore_fpu_checking(tsk))) { - __thread_fpu_end(tsk); - force_sig(SIGSEGV, tsk); - return; - } - - tsk->fpu_counter++; -} -EXPORT_SYMBOL_GPL(math_state_restore); - -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error_code) -{ -#ifdef CONFIG_MATH_EMULATION - if (read_cr0() & X86_CR0_EM) { - struct math_emu_info info = { }; - - conditional_sti(regs); - - info.regs = regs; - math_emulate(&info); - return; - } -#endif - math_state_restore(); /* interrupts still off */ -#ifdef CONFIG_X86_32 - conditional_sti(regs); -#endif -} - -#ifdef CONFIG_X86_32 -dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) -{ - siginfo_t info; - local_irq_enable(); - - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_BADSTK; - info.si_addr = NULL; - if (notify_die(DIE_TRAP, "iret exception", regs, error_code, - X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) - return; - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); -} -#endif - -/* Set of traps needed for early debugging. */ -void __init early_trap_init(void) -{ - set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); - /* int3 can be called from all */ - set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); - set_intr_gate(X86_TRAP_PF, &page_fault); - load_idt(&idt_descr); -} - -void __init trap_init(void) -{ - int i; - -#ifdef CONFIG_EISA - void __iomem *p = early_ioremap(0x0FFFD9, 4); - - if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) - EISA_bus = 1; - early_iounmap(p, 4); -#endif - - set_intr_gate(X86_TRAP_DE, ÷_error); - set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); - /* int4 can be called from all */ - set_system_intr_gate(X86_TRAP_OF, &overflow); - set_intr_gate(X86_TRAP_BR, &bounds); - set_intr_gate(X86_TRAP_UD, &invalid_op); - set_intr_gate(X86_TRAP_NM, &device_not_available); -#ifdef CONFIG_X86_32 - set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); -#else - set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); -#endif - set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun); - set_intr_gate(X86_TRAP_TS, &invalid_TSS); - set_intr_gate(X86_TRAP_NP, &segment_not_present); - set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); - set_intr_gate(X86_TRAP_GP, &general_protection); - set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug); - set_intr_gate(X86_TRAP_MF, &coprocessor_error); - set_intr_gate(X86_TRAP_AC, &alignment_check); -#ifdef CONFIG_X86_MCE - set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); -#endif - set_intr_gate(X86_TRAP_XF, &simd_coprocessor_error); - - /* Reserve all the builtin and the syscall vector: */ - for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) - set_bit(i, used_vectors); - -#ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#endif - -#ifdef CONFIG_X86_32 - set_system_trap_gate(SYSCALL_VECTOR, &system_call); - set_bit(SYSCALL_VECTOR, used_vectors); -#endif - - /* - * Should be a barrier for any external CPU state: - */ - cpu_init(); - - x86_init.irqs.trap_init(); - -#ifdef CONFIG_X86_64 - memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); - set_nmi_gate(X86_TRAP_DB, &debug); - set_nmi_gate(X86_TRAP_BP, &int3); -#endif -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/tsc.c b/ANDROID_3.4.5/arch/x86/kernel/tsc.c deleted file mode 100644 index fc0a147e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/tsc.c +++ /dev/null @@ -1,1026 +0,0 @@ -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/timer.h> -#include <linux/acpi_pmtmr.h> -#include <linux/cpufreq.h> -#include <linux/delay.h> -#include <linux/clocksource.h> -#include <linux/percpu.h> -#include <linux/timex.h> - -#include <asm/hpet.h> -#include <asm/timer.h> -#include <asm/vgtod.h> -#include <asm/time.h> -#include <asm/delay.h> -#include <asm/hypervisor.h> -#include <asm/nmi.h> -#include <asm/x86_init.h> - -unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ -EXPORT_SYMBOL(cpu_khz); - -unsigned int __read_mostly tsc_khz; -EXPORT_SYMBOL(tsc_khz); - -/* - * TSC can be unstable due to cpufreq or due to unsynced TSCs - */ -static int __read_mostly tsc_unstable; - -/* native_sched_clock() is called before tsc_init(), so - we must start with the TSC soft disabled to prevent - erroneous rdtsc usage on !cpu_has_tsc processors */ -static int __read_mostly tsc_disabled = -1; - -int tsc_clocksource_reliable; -/* - * Scheduler clock - returns current time in nanosec units. - */ -u64 native_sched_clock(void) -{ - u64 this_offset; - - /* - * Fall back to jiffies if there's no TSC available: - * ( But note that we still use it if the TSC is marked - * unstable. We do this because unlike Time Of Day, - * the scheduler clock tolerates small errors and it's - * very important for it to be as fast as the platform - * can achieve it. ) - */ - if (unlikely(tsc_disabled)) { - /* No locking but a rare wrong value is not a big deal: */ - return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); - } - - /* read the Time Stamp Counter: */ - rdtscll(this_offset); - - /* return the value in ns */ - return __cycles_2_ns(this_offset); -} - -/* We need to define a real function for sched_clock, to override the - weak default version */ -#ifdef CONFIG_PARAVIRT -unsigned long long sched_clock(void) -{ - return paravirt_sched_clock(); -} -#else -unsigned long long -sched_clock(void) __attribute__((alias("native_sched_clock"))); -#endif - -int check_tsc_unstable(void) -{ - return tsc_unstable; -} -EXPORT_SYMBOL_GPL(check_tsc_unstable); - -#ifdef CONFIG_X86_TSC -int __init notsc_setup(char *str) -{ - printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC completely.\n"); - tsc_disabled = 1; - return 1; -} -#else -/* - * disable flag for tsc. Takes effect by clearing the TSC cpu flag - * in cpu/common.c - */ -int __init notsc_setup(char *str) -{ - setup_clear_cpu_cap(X86_FEATURE_TSC); - return 1; -} -#endif - -__setup("notsc", notsc_setup); - -static int no_sched_irq_time; - -static int __init tsc_setup(char *str) -{ - if (!strcmp(str, "reliable")) - tsc_clocksource_reliable = 1; - if (!strncmp(str, "noirqtime", 9)) - no_sched_irq_time = 1; - return 1; -} - -__setup("tsc=", tsc_setup); - -#define MAX_RETRIES 5 -#define SMI_TRESHOLD 50000 - -/* - * Read TSC and the reference counters. Take care of SMI disturbance - */ -static u64 tsc_read_refs(u64 *p, int hpet) -{ - u64 t1, t2; - int i; - - for (i = 0; i < MAX_RETRIES; i++) { - t1 = get_cycles(); - if (hpet) - *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; - else - *p = acpi_pm_read_early(); - t2 = get_cycles(); - if ((t2 - t1) < SMI_TRESHOLD) - return t2; - } - return ULLONG_MAX; -} - -/* - * Calculate the TSC frequency from HPET reference - */ -static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) -{ - u64 tmp; - - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tmp, 1000000); - do_div(deltatsc, tmp); - - return (unsigned long) deltatsc; -} - -/* - * Calculate the TSC frequency from PMTimer reference - */ -static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) -{ - u64 tmp; - - if (!pm1 && !pm2) - return ULONG_MAX; - - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tmp = pm2 * 1000000000LL; - do_div(tmp, PMTMR_TICKS_PER_SEC); - do_div(deltatsc, tmp); - - return (unsigned long) deltatsc; -} - -#define CAL_MS 10 -#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) -#define CAL_PIT_LOOPS 1000 - -#define CAL2_MS 50 -#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) -#define CAL2_PIT_LOOPS 5000 - - -/* - * Try to calibrate the TSC against the Programmable - * Interrupt Timer and return the frequency of the TSC - * in kHz. - * - * Return ULONG_MAX on failure to calibrate. - */ -static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) -{ - u64 tsc, t1, t2, delta; - unsigned long tscmin, tscmax; - int pitcnt; - - /* Set the Gate high, disable speaker */ - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - /* - * Setup CTC channel 2* for mode 0, (interrupt on terminal - * count mode), binary count. Set the latch register to 50ms - * (LSB then MSB) to begin countdown. - */ - outb(0xb0, 0x43); - outb(latch & 0xff, 0x42); - outb(latch >> 8, 0x42); - - tsc = t1 = t2 = get_cycles(); - - pitcnt = 0; - tscmax = 0; - tscmin = ULONG_MAX; - while ((inb(0x61) & 0x20) == 0) { - t2 = get_cycles(); - delta = t2 - tsc; - tsc = t2; - if ((unsigned long) delta < tscmin) - tscmin = (unsigned int) delta; - if ((unsigned long) delta > tscmax) - tscmax = (unsigned int) delta; - pitcnt++; - } - - /* - * Sanity checks: - * - * If we were not able to read the PIT more than loopmin - * times, then we have been hit by a massive SMI - * - * If the maximum is 10 times larger than the minimum, - * then we got hit by an SMI as well. - */ - if (pitcnt < loopmin || tscmax > 10 * tscmin) - return ULONG_MAX; - - /* Calculate the PIT value */ - delta = t2 - t1; - do_div(delta, ms); - return delta; -} - -/* - * This reads the current MSB of the PIT counter, and - * checks if we are running on sufficiently fast and - * non-virtualized hardware. - * - * Our expectations are: - * - * - the PIT is running at roughly 1.19MHz - * - * - each IO is going to take about 1us on real hardware, - * but we allow it to be much faster (by a factor of 10) or - * _slightly_ slower (ie we allow up to a 2us read+counter - * update - anything else implies a unacceptably slow CPU - * or PIT for the fast calibration to work. - * - * - with 256 PIT ticks to read the value, we have 214us to - * see the same MSB (and overhead like doing a single TSC - * read per MSB value etc). - * - * - We're doing 2 reads per loop (LSB, MSB), and we expect - * them each to take about a microsecond on real hardware. - * So we expect a count value of around 100. But we'll be - * generous, and accept anything over 50. - * - * - if the PIT is stuck, and we see *many* more reads, we - * return early (and the next caller of pit_expect_msb() - * then consider it a failure when they don't see the - * next expected value). - * - * These expectations mean that we know that we have seen the - * transition from one expected value to another with a fairly - * high accuracy, and we didn't miss any events. We can thus - * use the TSC value at the transitions to calculate a pretty - * good value for the TSC frequencty. - */ -static inline int pit_verify_msb(unsigned char val) -{ - /* Ignore LSB */ - inb(0x42); - return inb(0x42) == val; -} - -static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) -{ - int count; - u64 tsc = 0, prev_tsc = 0; - - for (count = 0; count < 50000; count++) { - if (!pit_verify_msb(val)) - break; - prev_tsc = tsc; - tsc = get_cycles(); - } - *deltap = get_cycles() - prev_tsc; - *tscp = tsc; - - /* - * We require _some_ success, but the quality control - * will be based on the error terms on the TSC values. - */ - return count > 5; -} - -/* - * How many MSB values do we want to see? We aim for - * a maximum error rate of 500ppm (in practice the - * real error is much smaller), but refuse to spend - * more than 50ms on it. - */ -#define MAX_QUICK_PIT_MS 50 -#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) - -static unsigned long quick_pit_calibrate(void) -{ - int i; - u64 tsc, delta; - unsigned long d1, d2; - - /* Set the Gate high, disable speaker */ - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - /* - * Counter 2, mode 0 (one-shot), binary count - * - * NOTE! Mode 2 decrements by two (and then the - * output is flipped each time, giving the same - * final output frequency as a decrement-by-one), - * so mode 0 is much better when looking at the - * individual counts. - */ - outb(0xb0, 0x43); - - /* Start at 0xffff */ - outb(0xff, 0x42); - outb(0xff, 0x42); - - /* - * The PIT starts counting at the next edge, so we - * need to delay for a microsecond. The easiest way - * to do that is to just read back the 16-bit counter - * once from the PIT. - */ - pit_verify_msb(0); - - if (pit_expect_msb(0xff, &tsc, &d1)) { - for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { - if (!pit_expect_msb(0xff-i, &delta, &d2)) - break; - - /* - * Iterate until the error is less than 500 ppm - */ - delta -= tsc; - if (d1+d2 >= delta >> 11) - continue; - - /* - * Check the PIT one more time to verify that - * all TSC reads were stable wrt the PIT. - * - * This also guarantees serialization of the - * last cycle read ('d2') in pit_expect_msb. - */ - if (!pit_verify_msb(0xfe - i)) - break; - goto success; - } - } - printk("Fast TSC calibration failed\n"); - return 0; - -success: - /* - * Ok, if we get here, then we've seen the - * MSB of the PIT decrement 'i' times, and the - * error has shrunk to less than 500 ppm. - * - * As a result, we can depend on there not being - * any odd delays anywhere, and the TSC reads are - * reliable (within the error). - * - * kHz = ticks / time-in-seconds / 1000; - * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 - * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) - */ - delta *= PIT_TICK_RATE; - do_div(delta, i*256*1000); - printk("Fast TSC calibration using PIT\n"); - return delta; -} - -/** - * native_calibrate_tsc - calibrate the tsc on boot - */ -unsigned long native_calibrate_tsc(void) -{ - u64 tsc1, tsc2, delta, ref1, ref2; - unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms, fast_calibrate; - int hpet = is_hpet_enabled(), i, loopmin; - - local_irq_save(flags); - fast_calibrate = quick_pit_calibrate(); - local_irq_restore(flags); - if (fast_calibrate) - return fast_calibrate; - - /* - * Run 5 calibration loops to get the lowest frequency value - * (the best estimate). We use two different calibration modes - * here: - * - * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and - * load a timeout of 50ms. We read the time right after we - * started the timer and wait until the PIT count down reaches - * zero. In each wait loop iteration we read the TSC and check - * the delta to the previous read. We keep track of the min - * and max values of that delta. The delta is mostly defined - * by the IO time of the PIT access, so we can detect when a - * SMI/SMM disturbance happened between the two reads. If the - * maximum time is significantly larger than the minimum time, - * then we discard the result and have another try. - * - * 2) Reference counter. If available we use the HPET or the - * PMTIMER as a reference to check the sanity of that value. - * We use separate TSC readouts and check inside of the - * reference read for a SMI/SMM disturbance. We dicard - * disturbed values here as well. We do that around the PIT - * calibration delay loop as we have to wait for a certain - * amount of time anyway. - */ - - /* Preset PIT loop values */ - latch = CAL_LATCH; - ms = CAL_MS; - loopmin = CAL_PIT_LOOPS; - - for (i = 0; i < 3; i++) { - unsigned long tsc_pit_khz; - - /* - * Read the start value and the reference count of - * hpet/pmtimer when available. Then do the PIT - * calibration, which will take at least 50ms, and - * read the end value. - */ - local_irq_save(flags); - tsc1 = tsc_read_refs(&ref1, hpet); - tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); - tsc2 = tsc_read_refs(&ref2, hpet); - local_irq_restore(flags); - - /* Pick the lowest PIT TSC calibration so far */ - tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); - - /* hpet or pmtimer available ? */ - if (ref1 == ref2) - continue; - - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) - continue; - - tsc2 = (tsc2 - tsc1) * 1000000LL; - if (hpet) - tsc2 = calc_hpet_ref(tsc2, ref1, ref2); - else - tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); - - tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); - - /* Check the reference deviation */ - delta = ((u64) tsc_pit_min) * 100; - do_div(delta, tsc_ref_min); - - /* - * If both calibration results are inside a 10% window - * then we can be sure, that the calibration - * succeeded. We break out of the loop right away. We - * use the reference value, as it is more precise. - */ - if (delta >= 90 && delta <= 110) { - printk(KERN_INFO - "TSC: PIT calibration matches %s. %d loops\n", - hpet ? "HPET" : "PMTIMER", i + 1); - return tsc_ref_min; - } - - /* - * Check whether PIT failed more than once. This - * happens in virtualized environments. We need to - * give the virtual PC a slightly longer timeframe for - * the HPET/PMTIMER to make the result precise. - */ - if (i == 1 && tsc_pit_min == ULONG_MAX) { - latch = CAL2_LATCH; - ms = CAL2_MS; - loopmin = CAL2_PIT_LOOPS; - } - } - - /* - * Now check the results. - */ - if (tsc_pit_min == ULONG_MAX) { - /* PIT gave no useful value */ - printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); - - /* We don't have an alternative source, disable TSC */ - if (!hpet && !ref1 && !ref2) { - printk("TSC: No reference (HPET/PMTIMER) available\n"); - return 0; - } - - /* The alternative source failed as well, disable TSC */ - if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed.\n"); - return 0; - } - - /* Use the alternative source */ - printk(KERN_INFO "TSC: using %s reference calibration\n", - hpet ? "HPET" : "PMTIMER"); - - return tsc_ref_min; - } - - /* We don't have an alternative source, use the PIT calibration value */ - if (!hpet && !ref1 && !ref2) { - printk(KERN_INFO "TSC: Using PIT calibration value\n"); - return tsc_pit_min; - } - - /* The alternative source failed, use the PIT calibration value */ - if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " - "Using PIT calibration\n"); - return tsc_pit_min; - } - - /* - * The calibration values differ too much. In doubt, we use - * the PIT value as we know that there are PMTIMERs around - * running at double speed. At least we let the user know: - */ - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - printk(KERN_INFO "TSC: Using PIT calibration value\n"); - return tsc_pit_min; -} - -int recalibrate_cpu_khz(void) -{ -#ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; - - if (cpu_has_tsc) { - tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; - cpu_data(0).loops_per_jiffy = - cpufreq_scale(cpu_data(0).loops_per_jiffy, - cpu_khz_old, cpu_khz); - return 0; - } else - return -ENODEV; -#else - return -ENODEV; -#endif -} - -EXPORT_SYMBOL(recalibrate_cpu_khz); - - -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ - -DEFINE_PER_CPU(unsigned long, cyc2ns); -DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); - -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) -{ - unsigned long long tsc_now, ns_now, *offset; - unsigned long flags, *scale; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - scale = &per_cpu(cyc2ns, cpu); - offset = &per_cpu(cyc2ns_offset, cpu); - - rdtscll(tsc_now); - ns_now = __cycles_2_ns(tsc_now); - - if (cpu_khz) { - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - *offset = ns_now - mult_frac(tsc_now, *scale, - (1UL << CYC2NS_SCALE_FACTOR)); - } - - sched_clock_idle_wakeup_event(0); - local_irq_restore(flags); -} - -static unsigned long long cyc2ns_suspend; - -void tsc_save_sched_clock_state(void) -{ - if (!sched_clock_stable) - return; - - cyc2ns_suspend = sched_clock(); -} - -/* - * Even on processors with invariant TSC, TSC gets reset in some the - * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to - * arbitrary value (still sync'd across cpu's) during resume from such sleep - * states. To cope up with this, recompute the cyc2ns_offset for each cpu so - * that sched_clock() continues from the point where it was left off during - * suspend. - */ -void tsc_restore_sched_clock_state(void) -{ - unsigned long long offset; - unsigned long flags; - int cpu; - - if (!sched_clock_stable) - return; - - local_irq_save(flags); - - __this_cpu_write(cyc2ns_offset, 0); - offset = cyc2ns_suspend - sched_clock(); - - for_each_possible_cpu(cpu) - per_cpu(cyc2ns_offset, cpu) = offset; - - local_irq_restore(flags); -} - -#ifdef CONFIG_CPU_FREQ - -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency - * changes. - * - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - * not that important because current Opteron setups do not support - * scaling on SMP anyroads. - * - * Should fix up last_tsc too. Currently gettimeofday in the - * first tick after the change will be slightly wrong. - */ - -static unsigned int ref_freq; -static unsigned long loops_per_jiffy_ref; -static unsigned long tsc_khz_ref; - -static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - unsigned long *lpj; - - if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - - lpj = &boot_cpu_data.loops_per_jiffy; -#ifdef CONFIG_SMP - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#endif - - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; - tsc_khz_ref = tsc_khz; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); - - tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - mark_tsc_unstable("cpufreq changes"); - } - - set_cyc2ns_scale(tsc_khz, freq->cpu); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - -static int __init cpufreq_tsc(void) -{ - if (!cpu_has_tsc) - return 0; - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - return 0; -} - -core_initcall(cpufreq_tsc); - -#endif /* CONFIG_CPU_FREQ */ - -/* clocksource code */ - -static struct clocksource clocksource_tsc; - -/* - * We compare the TSC to the cycle_last value in the clocksource - * structure to avoid a nasty time-warp. This can be observed in a - * very small window right after one CPU updated cycle_last under - * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which - * is smaller than the cycle_last reference value due to a TSC which - * is slighty behind. This delta is nowhere else observable, but in - * that case it results in a forward time jump in the range of hours - * due to the unsigned delta calculation of the time keeping core - * code, which is necessary to support wrapping clocksources like pm - * timer. - */ -static cycle_t read_tsc(struct clocksource *cs) -{ - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static void resume_tsc(struct clocksource *cs) -{ - clocksource_tsc.cycle_last = 0; -} - -static struct clocksource clocksource_tsc = { - .name = "tsc", - .rating = 300, - .read = read_tsc, - .resume = resume_tsc, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, -#ifdef CONFIG_X86_64 - .archdata = { .vclock_mode = VCLOCK_TSC }, -#endif -}; - -void mark_tsc_unstable(char *reason) -{ - if (!tsc_unstable) { - tsc_unstable = 1; - sched_clock_stable = 0; - disable_sched_clock_irqtime(); - printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } - } -} - -EXPORT_SYMBOL_GPL(mark_tsc_unstable); - -static void __init check_system_tsc_reliable(void) -{ -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ -#define RTSC_SUSP 0x100 - unsigned long res_low, res_high; - - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; -#endif - if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) - tsc_clocksource_reliable = 1; -} - -/* - * Make an educated guess if the TSC is trustworthy and synchronized - * over all CPUs. - */ -__cpuinit int unsynchronized_tsc(void) -{ - if (!cpu_has_tsc || tsc_unstable) - return 1; - -#ifdef CONFIG_SMP - if (apic_is_clustered_box()) - return 1; -#endif - - if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return 0; - - if (tsc_clocksource_reliable) - return 0; - /* - * Intel systems are normally all synchronized. - * Exceptions must mark TSC as unstable: - */ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - /* assume multi socket systems are not synchronized: */ - if (num_possible_cpus() > 1) - return 1; - } - - return 0; -} - - -static void tsc_refine_calibration_work(struct work_struct *work); -static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); -/** - * tsc_refine_calibration_work - Further refine tsc freq calibration - * @work - ignored. - * - * This functions uses delayed work over a period of a - * second to further refine the TSC freq value. Since this is - * timer based, instead of loop based, we don't block the boot - * process while this longer calibration is done. - * - * If there are any calibration anomalies (too many SMIs, etc), - * or the refined calibration is off by 1% of the fast early - * calibration, we throw out the new calibration and use the - * early calibration. - */ -static void tsc_refine_calibration_work(struct work_struct *work) -{ - static u64 tsc_start = -1, ref_start; - static int hpet; - u64 tsc_stop, ref_stop, delta; - unsigned long freq; - - /* Don't bother refining TSC on unstable systems */ - if (check_tsc_unstable()) - goto out; - - /* - * Since the work is started early in boot, we may be - * delayed the first time we expire. So set the workqueue - * again once we know timers are working. - */ - if (tsc_start == -1) { - /* - * Only set hpet once, to avoid mixing hardware - * if the hpet becomes enabled later. - */ - hpet = is_hpet_enabled(); - schedule_delayed_work(&tsc_irqwork, HZ); - tsc_start = tsc_read_refs(&ref_start, hpet); - return; - } - - tsc_stop = tsc_read_refs(&ref_stop, hpet); - - /* hpet or pmtimer available ? */ - if (ref_start == ref_stop) - goto out; - - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) - goto out; - - delta = tsc_stop - tsc_start; - delta *= 1000000LL; - if (hpet) - freq = calc_hpet_ref(delta, ref_start, ref_stop); - else - freq = calc_pmtimer_ref(delta, ref_start, ref_stop); - - /* Make sure we're within 1% */ - if (abs(tsc_khz - freq) > tsc_khz/100) - goto out; - - tsc_khz = freq; - printk(KERN_INFO "Refined TSC clocksource calibration: " - "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, - (unsigned long)tsc_khz % 1000); - -out: - clocksource_register_khz(&clocksource_tsc, tsc_khz); -} - - -static int __init init_tsc_clocksource(void) -{ - if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) - return 0; - - if (tsc_clocksource_reliable) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } - - /* - * Trust the results of the earlier calibration on systems - * exporting a reliable TSC. - */ - if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - clocksource_register_khz(&clocksource_tsc, tsc_khz); - return 0; - } - - schedule_delayed_work(&tsc_irqwork, 0); - return 0; -} -/* - * We use device_initcall here, to ensure we run after the hpet - * is fully initialized, which may occur at fs_initcall time. - */ -device_initcall(init_tsc_clocksource); - -void __init tsc_init(void) -{ - u64 lpj; - int cpu; - - x86_init.timers.tsc_pre_init(); - - if (!cpu_has_tsc) - return; - - tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; - - if (!tsc_khz) { - mark_tsc_unstable("could not calculate TSC khz"); - return; - } - - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); - - /* - * Secondary CPUs do not run through tsc_init(), so set up - * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) - */ - for_each_possible_cpu(cpu) - set_cyc2ns_scale(cpu_khz, cpu); - - if (tsc_disabled > 0) - return; - - /* now allow native_sched_clock() to use rdtsc */ - tsc_disabled = 0; - - if (!no_sched_irq_time) - enable_sched_clock_irqtime(); - - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - - use_tsc_delay(); - - if (unsynchronized_tsc()) - mark_tsc_unstable("TSCs unsynchronized"); - - check_system_tsc_reliable(); -} - -#ifdef CONFIG_SMP -/* - * If we have a constant TSC and are using the TSC for the delay loop, - * we can skip clock calibration if another cpu in the same socket has already - * been calibrated. This assumes that CONSTANT_TSC applies to all - * cpus in the socket - this should be a safe assumption. - */ -unsigned long __cpuinit calibrate_delay_is_known(void) -{ - int i, cpu = smp_processor_id(); - - if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - - for_each_online_cpu(i) - if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) - return cpu_data(i).loops_per_jiffy; - return 0; -} -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/tsc_sync.c b/ANDROID_3.4.5/arch/x86/kernel/tsc_sync.c deleted file mode 100644 index fc25e60a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/tsc_sync.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * check TSC synchronization. - * - * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar - * - * We check whether all boot CPUs have their TSC's synchronized, - * print a warning if not and turn off the TSC clock-source. - * - * The warp-check is point-to-point between two CPUs, the CPU - * initiating the bootup is the 'source CPU', the freshly booting - * CPU is the 'target CPU'. - * - * Only two CPUs may participate - they can enter in any order. - * ( The serial nature of the boot logic and the CPU hotplug lock - * protects against more than 2 CPUs entering this code. ) - */ -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/nmi.h> -#include <asm/tsc.h> - -/* - * Entry/exit counters that make sure that both CPUs - * run the measurement code at once: - */ -static __cpuinitdata atomic_t start_count; -static __cpuinitdata atomic_t stop_count; - -/* - * We use a raw spinlock in this exceptional case, because - * we want to have the fastest, inlined, non-debug version - * of a critical section, to be able to prove TSC time-warps: - */ -static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; - -static __cpuinitdata cycles_t last_tsc; -static __cpuinitdata cycles_t max_warp; -static __cpuinitdata int nr_warps; - -/* - * TSC-warp measurement loop running on both CPUs: - */ -static __cpuinit void check_tsc_warp(unsigned int timeout) -{ - cycles_t start, now, prev, end; - int i; - - rdtsc_barrier(); - start = get_cycles(); - rdtsc_barrier(); - /* - * The measurement runs for 'timeout' msecs: - */ - end = start + (cycles_t) tsc_khz * timeout; - now = start; - - for (i = 0; ; i++) { - /* - * We take the global lock, measure TSC, save the - * previous TSC that was measured (possibly on - * another CPU) and update the previous TSC timestamp. - */ - arch_spin_lock(&sync_lock); - prev = last_tsc; - rdtsc_barrier(); - now = get_cycles(); - rdtsc_barrier(); - last_tsc = now; - arch_spin_unlock(&sync_lock); - - /* - * Be nice every now and then (and also check whether - * measurement is done [we also insert a 10 million - * loops safety exit, so we dont lock up in case the - * TSC readout is totally broken]): - */ - if (unlikely(!(i & 7))) { - if (now > end || i > 10000000) - break; - cpu_relax(); - touch_nmi_watchdog(); - } - /* - * Outside the critical section we can now see whether - * we saw a time-warp of the TSC going backwards: - */ - if (unlikely(prev > now)) { - arch_spin_lock(&sync_lock); - max_warp = max(max_warp, prev - now); - nr_warps++; - arch_spin_unlock(&sync_lock); - } - } - WARN(!(now-start), - "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", - now-start, end-start); -} - -/* - * If the target CPU coming online doesn't have any of its core-siblings - * online, a timeout of 20msec will be used for the TSC-warp measurement - * loop. Otherwise a smaller timeout of 2msec will be used, as we have some - * information about this socket already (and this information grows as we - * have more and more logical-siblings in that socket). - * - * Ideally we should be able to skip the TSC sync check on the other - * core-siblings, if the first logical CPU in a socket passed the sync test. - * But as the TSC is per-logical CPU and can potentially be modified wrongly - * by the bios, TSC sync test for smaller duration should be able - * to catch such errors. Also this will catch the condition where all the - * cores in the socket doesn't get reset at the same time. - */ -static inline unsigned int loop_timeout(int cpu) -{ - return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20; -} - -/* - * Source CPU calls into this - it waits for the freshly booted - * target CPU to arrive and then starts the measurement: - */ -void __cpuinit check_tsc_sync_source(int cpu) -{ - int cpus = 2; - - /* - * No need to check if we already know that the TSC is not - * synchronized: - */ - if (unsynchronized_tsc()) - return; - - if (tsc_clocksource_reliable) { - if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) - pr_info( - "Skipped synchronization checks as TSC is reliable.\n"); - return; - } - - /* - * Reset it - in case this is a second bootup: - */ - atomic_set(&stop_count, 0); - - /* - * Wait for the target to arrive: - */ - while (atomic_read(&start_count) != cpus-1) - cpu_relax(); - /* - * Trigger the target to continue into the measurement too: - */ - atomic_inc(&start_count); - - check_tsc_warp(loop_timeout(cpu)); - - while (atomic_read(&stop_count) != cpus-1) - cpu_relax(); - - if (nr_warps) { - pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", - smp_processor_id(), cpu); - pr_warning("Measured %Ld cycles TSC warp between CPUs, " - "turning off TSC clock.\n", max_warp); - mark_tsc_unstable("check_tsc_sync_source failed"); - } else { - pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", - smp_processor_id(), cpu); - } - - /* - * Reset it - just in case we boot another CPU later: - */ - atomic_set(&start_count, 0); - nr_warps = 0; - max_warp = 0; - last_tsc = 0; - - /* - * Let the target continue with the bootup: - */ - atomic_inc(&stop_count); -} - -/* - * Freshly booted CPUs call into this: - */ -void __cpuinit check_tsc_sync_target(void) -{ - int cpus = 2; - - if (unsynchronized_tsc() || tsc_clocksource_reliable) - return; - - /* - * Register this CPU's participation and wait for the - * source CPU to start the measurement: - */ - atomic_inc(&start_count); - while (atomic_read(&start_count) != cpus) - cpu_relax(); - - check_tsc_warp(loop_timeout(smp_processor_id())); - - /* - * Ok, we are done: - */ - atomic_inc(&stop_count); - - /* - * Wait for the source CPU to print stuff: - */ - while (atomic_read(&stop_count) != cpus) - cpu_relax(); -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/verify_cpu.S b/ANDROID_3.4.5/arch/x86/kernel/verify_cpu.S deleted file mode 100644 index b9242bac..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/verify_cpu.S +++ /dev/null @@ -1,139 +0,0 @@ -/* - * - * verify_cpu.S - Code for cpu long mode and SSE verification. This - * code has been borrowed from boot/setup.S and was introduced by - * Andi Kleen. - * - * Copyright (c) 2007 Andi Kleen (ak@suse.de) - * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) - * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) - * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - * - * This is a common code for verification whether CPU supports - * long mode and SSE or not. It is not called directly instead this - * file is included at various places and compiled in that context. - * This file is expected to run in 32bit code. Currently: - * - * arch/x86/boot/compressed/head_64.S: Boot cpu verification - * arch/x86/kernel/trampoline_64.S: secondary processor verification - * arch/x86/kernel/head_32.S: processor startup - * - * verify_cpu, returns the status of longmode and SSE in register %eax. - * 0: Success 1: Failure - * - * On Intel, the XD_DISABLE flag will be cleared as a side-effect. - * - * The caller needs to check for the error code and take the action - * appropriately. Either display a message or halt. - */ - -#include <asm/cpufeature.h> -#include <asm/msr-index.h> - -verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl - - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - jz verify_cpu_no_longmode # cpu has no cpuid - - movl $0x0,%eax # See if cpuid 1 is implemented - cpuid - cmpl $0x1,%eax - jb verify_cpu_no_longmode # no cpuid 1 - - xor %di,%di - cmpl $0x68747541,%ebx # AuthenticAMD - jnz verify_cpu_noamd - cmpl $0x69746e65,%edx - jnz verify_cpu_noamd - cmpl $0x444d4163,%ecx - jnz verify_cpu_noamd - mov $1,%di # cpu is from AMD - jmp verify_cpu_check - -verify_cpu_noamd: - cmpl $0x756e6547,%ebx # GenuineIntel? - jnz verify_cpu_check - cmpl $0x49656e69,%edx - jnz verify_cpu_check - cmpl $0x6c65746e,%ecx - jnz verify_cpu_check - - # only call IA32_MISC_ENABLE when: - # family > 6 || (family == 6 && model >= 0xd) - movl $0x1, %eax # check CPU family and model - cpuid - movl %eax, %ecx - - andl $0x0ff00f00, %eax # mask family and extended family - shrl $8, %eax - cmpl $6, %eax - ja verify_cpu_clear_xd # family > 6, ok - jb verify_cpu_check # family < 6, skip - - andl $0x000f00f0, %ecx # mask model and extended model - shrl $4, %ecx - cmpl $0xd, %ecx - jb verify_cpu_check # family == 6, model < 0xd, skip - -verify_cpu_clear_xd: - movl $MSR_IA32_MISC_ENABLE, %ecx - rdmsr - btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE - jnc verify_cpu_check # only write MSR if bit was changed - wrmsr - -verify_cpu_check: - movl $0x1,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK0,%edx - xorl $REQUIRED_MASK0,%edx - jnz verify_cpu_no_longmode - - movl $0x80000000,%eax # See if extended cpuid is implemented - cpuid - cmpl $0x80000001,%eax - jb verify_cpu_no_longmode # no extended cpuid - - movl $0x80000001,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK1,%edx - xorl $REQUIRED_MASK1,%edx - jnz verify_cpu_no_longmode - -verify_cpu_sse_test: - movl $1,%eax - cpuid - andl $SSE_MASK,%edx - cmpl $SSE_MASK,%edx - je verify_cpu_sse_ok - test %di,%di - jz verify_cpu_no_longmode # only try to force SSE on AMD - movl $MSR_K7_HWCR,%ecx - rdmsr - btr $15,%eax # enable SSE - wrmsr - xor %di,%di # don't loop - jmp verify_cpu_sse_test # try again - -verify_cpu_no_longmode: - popfl # Restore caller passed flags - movl $1,%eax - ret -verify_cpu_sse_ok: - popfl # Restore caller passed flags - xorl %eax, %eax - ret diff --git a/ANDROID_3.4.5/arch/x86/kernel/vm86_32.c b/ANDROID_3.4.5/arch/x86/kernel/vm86_32.c deleted file mode 100644 index 255f58ae..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/vm86_32.c +++ /dev/null @@ -1,849 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 - * stack - Manfred Spraul <manfred@colorfullife.com> - * - * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle - * them correctly. Now the emulation will be in a - * consistent state after stackfaults - Kasper Dupont - * <kasperd@daimi.au.dk> - * - * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont - * <kasperd@daimi.au.dk> - * - * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault - * caused by Kasper Dupont's changes - Stas Sergeev - * - * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes. - * Kasper Dupont <kasperd@daimi.au.dk> - * - * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault. - * Kasper Dupont <kasperd@daimi.au.dk> - * - * 9 apr 2002 - Changed stack access macros to jump to a label - * instead of returning to userspace. This simplifies - * do_int, and is needed by handle_vm6_fault. Kasper - * Dupont <kasperd@daimi.au.dk> - * - */ - -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/highmem.h> -#include <linux/ptrace.h> -#include <linux/audit.h> -#include <linux/stddef.h> - -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/tlbflush.h> -#include <asm/irq.h> -#include <asm/syscalls.h> - -/* - * Known problems: - * - * Interrupt handling is not guaranteed: - * - a real x86 will disable all interrupts for one instruction - * after a "mov ss,xx" to make stack handling atomic even without - * the 'lss' instruction. We can't guarantee this in v86 mode, - * as the next instruction might result in a page fault or similar. - * - a real x86 will have interrupts disabled for one instruction - * past the 'sti' that enables them. We don't bother with all the - * details yet. - * - * Let's hope these problems do not actually matter for anything. - */ - - -#define KVM86 ((struct kernel_vm86_struct *)regs) -#define VMPI KVM86->vm86plus - - -/* - * 8- and 16-bit register defines.. - */ -#define AL(regs) (((unsigned char *)&((regs)->pt.ax))[0]) -#define AH(regs) (((unsigned char *)&((regs)->pt.ax))[1]) -#define IP(regs) (*(unsigned short *)&((regs)->pt.ip)) -#define SP(regs) (*(unsigned short *)&((regs)->pt.sp)) - -/* - * virtual flags (16 and 32-bit versions) - */ -#define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) -#define VEFLAGS (current->thread.v86flags) - -#define set_flags(X, new, mask) \ -((X) = ((X) & ~(mask)) | ((new) & (mask))) - -#define SAFE_MASK (0xDD5) -#define RETURN_MASK (0xDFF) - -/* convert kernel_vm86_regs to vm86_regs */ -static int copy_vm86_regs_to_user(struct vm86_regs __user *user, - const struct kernel_vm86_regs *regs) -{ - int ret = 0; - - /* - * kernel_vm86_regs is missing gs, so copy everything up to - * (but not including) orig_eax, and then rest including orig_eax. - */ - ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax)); - - return ret; -} - -/* convert vm86_regs to kernel_vm86_regs */ -static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, - const struct vm86_regs __user *user, - unsigned extra) -{ - int ret = 0; - - /* copy ax-fs inclusive */ - ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - /* copy orig_ax-__gsh+extra */ - ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax) + - extra); - return ret; -} - -struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) -{ - struct tss_struct *tss; - struct pt_regs *ret; - unsigned long tmp; - - /* - * This gets called from entry.S with interrupts disabled, but - * from process context. Enable interrupts here, before trying - * to access user space. - */ - local_irq_enable(); - - if (!current->thread.vm86_info) { - printk("no vm86_info: BAD\n"); - do_exit(SIGSEGV); - } - set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); - tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); - tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); - if (tmp) { - printk("vm86: could not access userspace vm86_info\n"); - do_exit(SIGSEGV); - } - - tss = &per_cpu(init_tss, get_cpu()); - current->thread.sp0 = current->thread.saved_sp0; - current->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tss, ¤t->thread); - current->thread.saved_sp0 = 0; - put_cpu(); - - ret = KVM86->regs32; - - ret->fs = current->thread.saved_fs; - set_user_gs(ret, current->thread.saved_gs); - - return ret; -} - -static void mark_screen_rdonly(struct mm_struct *mm) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - spinlock_t *ptl; - int i; - - down_write(&mm->mmap_sem); - pgd = pgd_offset(mm, 0xA0000); - if (pgd_none_or_clear_bad(pgd)) - goto out; - pud = pud_offset(pgd, 0xA0000); - if (pud_none_or_clear_bad(pud)) - goto out; - pmd = pmd_offset(pud, 0xA0000); - split_huge_page_pmd(mm, pmd); - if (pmd_none_or_clear_bad(pmd)) - goto out; - pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); - for (i = 0; i < 32; i++) { - if (pte_present(*pte)) - set_pte(pte, pte_wrprotect(*pte)); - pte++; - } - pte_unmap_unlock(pte, ptl); -out: - up_write(&mm->mmap_sem); - flush_tlb(); -} - - - -static int do_vm86_irq_handling(int subfunction, int irqnumber); -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); - -int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) -{ - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk; - int tmp, ret = -EPERM; - - tsk = current; - if (tsk->thread.saved_sp0) - goto out; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, vm86plus) - - sizeof(info.regs)); - ret = -EFAULT; - if (tmp) - goto out; - memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = regs; - tsk->thread.vm86_info = v86; - do_sys_vm86(&info, tsk); - ret = 0; /* we never return here */ -out: - return ret; -} - - -int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) -{ - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk; - int tmp, ret; - struct vm86plus_struct __user *v86; - - tsk = current; - switch (cmd) { - case VM86_REQUEST_IRQ: - case VM86_FREE_IRQ: - case VM86_GET_IRQ_BITS: - case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(cmd, (int)arg); - goto out; - case VM86_PLUS_INSTALL_CHECK: - /* - * NOTE: on old vm86 stuff this will return the error - * from access_ok(), because the subfunction is - * interpreted as (invalid) address to vm86_struct. - * So the installation check works. - */ - ret = 0; - goto out; - } - - /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ - ret = -EPERM; - if (tsk->thread.saved_sp0) - goto out; - v86 = (struct vm86plus_struct __user *)arg; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, regs32) - - sizeof(info.regs)); - ret = -EFAULT; - if (tmp) - goto out; - info.regs32 = regs; - info.vm86plus.is_vm86pus = 1; - tsk->thread.vm86_info = (struct vm86_struct __user *)v86; - do_sys_vm86(&info, tsk); - ret = 0; /* we never return here */ -out: - return ret; -} - - -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) -{ - struct tss_struct *tss; -/* - * make sure the vm86() system call doesn't try to do anything silly - */ - info->regs.pt.ds = 0; - info->regs.pt.es = 0; - info->regs.pt.fs = 0; -#ifndef CONFIG_X86_32_LAZY_GS - info->regs.pt.gs = 0; -#endif - -/* - * The flags register is also special: we cannot trust that the user - * has set it up safely, so this makes sure interrupt etc flags are - * inherited from protected mode. - */ - VEFLAGS = info->regs.pt.flags; - info->regs.pt.flags &= SAFE_MASK; - info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK; - info->regs.pt.flags |= X86_VM_MASK; - - switch (info->cpu_type) { - case CPU_286: - tsk->thread.v86mask = 0; - break; - case CPU_386: - tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; - break; - case CPU_486: - tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; - break; - default: - tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; - break; - } - -/* - * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) - */ - info->regs32->ax = VM86_SIGNAL; - tsk->thread.saved_sp0 = tsk->thread.sp0; - tsk->thread.saved_fs = info->regs32->fs; - tsk->thread.saved_gs = get_user_gs(info->regs32); - - tss = &per_cpu(init_tss, get_cpu()); - tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; - if (cpu_has_sep) - tsk->thread.sysenter_cs = 0; - load_sp0(tss, &tsk->thread); - put_cpu(); - - tsk->thread.screen_bitmap = info->screen_bitmap; - if (info->flags & VM86_SCREEN_BITMAP) - mark_screen_rdonly(tsk->mm); - - /*call __audit_syscall_exit since we do not exit via the normal paths */ -#ifdef CONFIG_AUDITSYSCALL - if (unlikely(current->audit_context)) - __audit_syscall_exit(1, 0); -#endif - - __asm__ __volatile__( - "movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" -#ifdef CONFIG_X86_32_LAZY_GS - "mov %2, %%gs\n\t" -#endif - "jmp resume_userspace" - : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); - /* we never return here */ -} - -static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) -{ - struct pt_regs *regs32; - - regs32 = save_v86_state(regs16); - regs32->ax = retval; - __asm__ __volatile__("movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" - "jmp resume_userspace" - : : "r" (regs32), "r" (current_thread_info())); -} - -static inline void set_IF(struct kernel_vm86_regs *regs) -{ - VEFLAGS |= X86_EFLAGS_VIF; - if (VEFLAGS & X86_EFLAGS_VIP) - return_to_32bit(regs, VM86_STI); -} - -static inline void clear_IF(struct kernel_vm86_regs *regs) -{ - VEFLAGS &= ~X86_EFLAGS_VIF; -} - -static inline void clear_TF(struct kernel_vm86_regs *regs) -{ - regs->pt.flags &= ~X86_EFLAGS_TF; -} - -static inline void clear_AC(struct kernel_vm86_regs *regs) -{ - regs->pt.flags &= ~X86_EFLAGS_AC; -} - -/* - * It is correct to call set_IF(regs) from the set_vflags_* - * functions. However someone forgot to call clear_IF(regs) - * in the opposite case. - * After the command sequence CLI PUSHF STI POPF you should - * end up with interrupts disabled, but you ended up with - * interrupts enabled. - * ( I was testing my own changes, but the only bug I - * could find was in a function I had not changed. ) - * [KD] - */ - -static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs) -{ - set_flags(VEFLAGS, flags, current->thread.v86mask); - set_flags(regs->pt.flags, flags, SAFE_MASK); - if (flags & X86_EFLAGS_IF) - set_IF(regs); - else - clear_IF(regs); -} - -static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs) -{ - set_flags(VFLAGS, flags, current->thread.v86mask); - set_flags(regs->pt.flags, flags, SAFE_MASK); - if (flags & X86_EFLAGS_IF) - set_IF(regs); - else - clear_IF(regs); -} - -static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) -{ - unsigned long flags = regs->pt.flags & RETURN_MASK; - - if (VEFLAGS & X86_EFLAGS_VIF) - flags |= X86_EFLAGS_IF; - flags |= X86_EFLAGS_IOPL; - return flags | (VEFLAGS & current->thread.v86mask); -} - -static inline int is_revectored(int nr, struct revectored_struct *bitmap) -{ - __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" - :"=r" (nr) - :"m" (*bitmap), "r" (nr)); - return nr; -} - -#define val_byte(val, n) (((__u8 *)&val)[n]) - -#define pushb(base, ptr, val, err_label) \ - do { \ - __u8 __val = val; \ - ptr--; \ - if (put_user(__val, base + ptr) < 0) \ - goto err_label; \ - } while (0) - -#define pushw(base, ptr, val, err_label) \ - do { \ - __u16 __val = val; \ - ptr--; \ - if (put_user(val_byte(__val, 1), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 0), base + ptr) < 0) \ - goto err_label; \ - } while (0) - -#define pushl(base, ptr, val, err_label) \ - do { \ - __u32 __val = val; \ - ptr--; \ - if (put_user(val_byte(__val, 3), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 2), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 1), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 0), base + ptr) < 0) \ - goto err_label; \ - } while (0) - -#define popb(base, ptr, err_label) \ - ({ \ - __u8 __res; \ - if (get_user(__res, base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - __res; \ - }) - -#define popw(base, ptr, err_label) \ - ({ \ - __u16 __res; \ - if (get_user(val_byte(__res, 0), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 1), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - __res; \ - }) - -#define popl(base, ptr, err_label) \ - ({ \ - __u32 __res; \ - if (get_user(val_byte(__res, 0), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 1), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 2), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 3), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - __res; \ - }) - -/* There are so many possible reasons for this function to return - * VM86_INTx, so adding another doesn't bother me. We can expect - * userspace programs to be able to handle it. (Getting a problem - * in userspace is always better than an Oops anyway.) [KD] - */ -static void do_int(struct kernel_vm86_regs *regs, int i, - unsigned char __user *ssp, unsigned short sp) -{ - unsigned long __user *intr_ptr; - unsigned long segoffs; - - if (regs->pt.cs == BIOSSEG) - goto cannot_handle; - if (is_revectored(i, &KVM86->int_revectored)) - goto cannot_handle; - if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) - goto cannot_handle; - intr_ptr = (unsigned long __user *) (i << 2); - if (get_user(segoffs, intr_ptr)) - goto cannot_handle; - if ((segoffs >> 16) == BIOSSEG) - goto cannot_handle; - pushw(ssp, sp, get_vflags(regs), cannot_handle); - pushw(ssp, sp, regs->pt.cs, cannot_handle); - pushw(ssp, sp, IP(regs), cannot_handle); - regs->pt.cs = segoffs >> 16; - SP(regs) -= 6; - IP(regs) = segoffs & 0xffff; - clear_TF(regs); - clear_IF(regs); - clear_AC(regs); - return; - -cannot_handle: - return_to_32bit(regs, VM86_INTx + (i << 8)); -} - -int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) -{ - if (VMPI.is_vm86pus) { - if ((trapno == 3) || (trapno == 1)) { - KVM86->regs32->ax = VM86_TRAP + (trapno << 8); - /* setting this flag forces the code in entry_32.S to - call save_v86_state() and change the stack pointer - to KVM86->regs32 */ - set_thread_flag(TIF_IRET); - return 0; - } - do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); - return 0; - } - if (trapno != 1) - return 1; /* we let this handle by the calling routine */ - current->thread.trap_nr = trapno; - current->thread.error_code = error_code; - force_sig(SIGTRAP, current); - return 0; -} - -void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) -{ - unsigned char opcode; - unsigned char __user *csp; - unsigned char __user *ssp; - unsigned short ip, sp, orig_flags; - int data32, pref_done; - -#define CHECK_IF_IN_TRAP \ - if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ - newflags |= X86_EFLAGS_TF -#define VM86_FAULT_RETURN do { \ - if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \ - return_to_32bit(regs, VM86_PICRETURN); \ - if (orig_flags & X86_EFLAGS_TF) \ - handle_vm86_trap(regs, 0, 1); \ - return; } while (0) - - orig_flags = *(unsigned short *)®s->pt.flags; - - csp = (unsigned char __user *) (regs->pt.cs << 4); - ssp = (unsigned char __user *) (regs->pt.ss << 4); - sp = SP(regs); - ip = IP(regs); - - data32 = 0; - pref_done = 0; - do { - switch (opcode = popb(csp, ip, simulate_sigsegv)) { - case 0x66: /* 32-bit data */ data32 = 1; break; - case 0x67: /* 32-bit address */ break; - case 0x2e: /* CS */ break; - case 0x3e: /* DS */ break; - case 0x26: /* ES */ break; - case 0x36: /* SS */ break; - case 0x65: /* GS */ break; - case 0x64: /* FS */ break; - case 0xf2: /* repnz */ break; - case 0xf3: /* rep */ break; - default: pref_done = 1; - } - } while (!pref_done); - - switch (opcode) { - - /* pushf */ - case 0x9c: - if (data32) { - pushl(ssp, sp, get_vflags(regs), simulate_sigsegv); - SP(regs) -= 4; - } else { - pushw(ssp, sp, get_vflags(regs), simulate_sigsegv); - SP(regs) -= 2; - } - IP(regs) = ip; - VM86_FAULT_RETURN; - - /* popf */ - case 0x9d: - { - unsigned long newflags; - if (data32) { - newflags = popl(ssp, sp, simulate_sigsegv); - SP(regs) += 4; - } else { - newflags = popw(ssp, sp, simulate_sigsegv); - SP(regs) += 2; - } - IP(regs) = ip; - CHECK_IF_IN_TRAP; - if (data32) - set_vflags_long(newflags, regs); - else - set_vflags_short(newflags, regs); - - VM86_FAULT_RETURN; - } - - /* int xx */ - case 0xcd: { - int intno = popb(csp, ip, simulate_sigsegv); - IP(regs) = ip; - if (VMPI.vm86dbg_active) { - if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3]) - return_to_32bit(regs, VM86_INTx + (intno << 8)); - } - do_int(regs, intno, ssp, sp); - return; - } - - /* iret */ - case 0xcf: - { - unsigned long newip; - unsigned long newcs; - unsigned long newflags; - if (data32) { - newip = popl(ssp, sp, simulate_sigsegv); - newcs = popl(ssp, sp, simulate_sigsegv); - newflags = popl(ssp, sp, simulate_sigsegv); - SP(regs) += 12; - } else { - newip = popw(ssp, sp, simulate_sigsegv); - newcs = popw(ssp, sp, simulate_sigsegv); - newflags = popw(ssp, sp, simulate_sigsegv); - SP(regs) += 6; - } - IP(regs) = newip; - regs->pt.cs = newcs; - CHECK_IF_IN_TRAP; - if (data32) { - set_vflags_long(newflags, regs); - } else { - set_vflags_short(newflags, regs); - } - VM86_FAULT_RETURN; - } - - /* cli */ - case 0xfa: - IP(regs) = ip; - clear_IF(regs); - VM86_FAULT_RETURN; - - /* sti */ - /* - * Damn. This is incorrect: the 'sti' instruction should actually - * enable interrupts after the /next/ instruction. Not good. - * - * Probably needs some horsing around with the TF flag. Aiee.. - */ - case 0xfb: - IP(regs) = ip; - set_IF(regs); - VM86_FAULT_RETURN; - - default: - return_to_32bit(regs, VM86_UNKNOWN); - } - - return; - -simulate_sigsegv: - /* FIXME: After a long discussion with Stas we finally - * agreed, that this is wrong. Here we should - * really send a SIGSEGV to the user program. - * But how do we create the correct context? We - * are inside a general protection fault handler - * and has just returned from a page fault handler. - * The correct context for the signal handler - * should be a mixture of the two, but how do we - * get the information? [KD] - */ - return_to_32bit(regs, VM86_UNKNOWN); -} - -/* ---------------- vm86 special IRQ passing stuff ----------------- */ - -#define VM86_IRQNAME "vm86irq" - -static struct vm86_irqs { - struct task_struct *tsk; - int sig; -} vm86_irqs[16]; - -static DEFINE_SPINLOCK(irqbits_lock); -static int irqbits; - -#define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \ - | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \ - | (1 << SIGUNUSED)) - -static irqreturn_t irq_handler(int intno, void *dev_id) -{ - int irq_bit; - unsigned long flags; - - spin_lock_irqsave(&irqbits_lock, flags); - irq_bit = 1 << intno; - if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk) - goto out; - irqbits |= irq_bit; - if (vm86_irqs[intno].sig) - send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1); - /* - * IRQ will be re-enabled when user asks for the irq (whether - * polling or as a result of the signal) - */ - disable_irq_nosync(intno); - spin_unlock_irqrestore(&irqbits_lock, flags); - return IRQ_HANDLED; - -out: - spin_unlock_irqrestore(&irqbits_lock, flags); - return IRQ_NONE; -} - -static inline void free_vm86_irq(int irqnumber) -{ - unsigned long flags; - - free_irq(irqnumber, NULL); - vm86_irqs[irqnumber].tsk = NULL; - - spin_lock_irqsave(&irqbits_lock, flags); - irqbits &= ~(1 << irqnumber); - spin_unlock_irqrestore(&irqbits_lock, flags); -} - -void release_vm86_irqs(struct task_struct *task) -{ - int i; - for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++) - if (vm86_irqs[i].tsk == task) - free_vm86_irq(i); -} - -static inline int get_and_reset_irq(int irqnumber) -{ - int bit; - unsigned long flags; - int ret = 0; - - if (invalid_vm86_irq(irqnumber)) return 0; - if (vm86_irqs[irqnumber].tsk != current) return 0; - spin_lock_irqsave(&irqbits_lock, flags); - bit = irqbits & (1 << irqnumber); - irqbits &= ~bit; - if (bit) { - enable_irq(irqnumber); - ret = 1; - } - - spin_unlock_irqrestore(&irqbits_lock, flags); - return ret; -} - - -static int do_vm86_irq_handling(int subfunction, int irqnumber) -{ - int ret; - switch (subfunction) { - case VM86_GET_AND_RESET_IRQ: { - return get_and_reset_irq(irqnumber); - } - case VM86_GET_IRQ_BITS: { - return irqbits; - } - case VM86_REQUEST_IRQ: { - int sig = irqnumber >> 8; - int irq = irqnumber & 255; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM; - if (invalid_vm86_irq(irq)) return -EPERM; - if (vm86_irqs[irq].tsk) return -EPERM; - ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL); - if (ret) return ret; - vm86_irqs[irq].sig = sig; - vm86_irqs[irq].tsk = current; - return irq; - } - case VM86_FREE_IRQ: { - if (invalid_vm86_irq(irqnumber)) return -EPERM; - if (!vm86_irqs[irqnumber].tsk) return 0; - if (vm86_irqs[irqnumber].tsk != current) return -EPERM; - free_vm86_irq(irqnumber); - return 0; - } - } - return -EINVAL; -} - diff --git a/ANDROID_3.4.5/arch/x86/kernel/vmlinux.lds.S b/ANDROID_3.4.5/arch/x86/kernel/vmlinux.lds.S deleted file mode 100644 index 0f703f10..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/vmlinux.lds.S +++ /dev/null @@ -1,376 +0,0 @@ -/* - * ld script for the x86 kernel - * - * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> - * - * Modernisation, unification and other changes and fixes: - * Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org> - * - * - * Don't define absolute symbols until and unless you know that symbol - * value is should remain constant even if kernel image is relocated - * at run time. Absolute symbols are not relocated. If symbol value should - * change if kernel is relocated, make the symbol section relative and - * put it inside the section definition. - */ - -#ifdef CONFIG_X86_32 -#define LOAD_OFFSET __PAGE_OFFSET -#else -#define LOAD_OFFSET __START_KERNEL_map -#endif - -#include <asm-generic/vmlinux.lds.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/page_types.h> -#include <asm/cache.h> -#include <asm/boot.h> - -#undef i386 /* in case the preprocessor is a 32bit one */ - -OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) - -#ifdef CONFIG_X86_32 -OUTPUT_ARCH(i386) -ENTRY(phys_startup_32) -jiffies = jiffies_64; -#else -OUTPUT_ARCH(i386:x86-64) -ENTRY(phys_startup_64) -jiffies_64 = jiffies; -#endif - -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) -/* - * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA - * we retain large page mappings for boundaries spanning kernel text, rodata - * and data sections. - * - * However, kernel identity mappings will have different RWX permissions - * to the pages mapping to text and to the pages padding (which are freed) the - * text section. Hence kernel identity mappings will be broken to smaller - * pages. For 64-bit, kernel text and kernel identity mappings are different, - * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, - * as well as retain 2MB large page mappings for kernel text. - */ -#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); - -#define X64_ALIGN_DEBUG_RODATA_END \ - . = ALIGN(HPAGE_SIZE); \ - __end_rodata_hpage_align = .; - -#else - -#define X64_ALIGN_DEBUG_RODATA_BEGIN -#define X64_ALIGN_DEBUG_RODATA_END - -#endif - -PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(6); /* RW_ */ -#ifdef CONFIG_X86_64 -#ifdef CONFIG_SMP - percpu PT_LOAD FLAGS(6); /* RW_ */ -#endif - init PT_LOAD FLAGS(7); /* RWE */ -#endif - note PT_NOTE FLAGS(0); /* ___ */ -} - -SECTIONS -{ -#ifdef CONFIG_X86_32 - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; -#else - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; -#endif - - /* Text and read-only data */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; - /* bootstrapping code */ - HEAD_TEXT -#ifdef CONFIG_X86_32 - . = ALIGN(PAGE_SIZE); - *(.text..page_aligned) -#endif - . = ALIGN(8); - _stext = .; - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - ENTRY_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - /* End of text section */ - _etext = .; - } :text = 0x9090 - - NOTES :text :note - - EXCEPTION_TABLE(16) :text = 0x9090 - -#if defined(CONFIG_DEBUG_RODATA) - /* .text should occupy whole number of pages */ - . = ALIGN(PAGE_SIZE); -#endif - X64_ALIGN_DEBUG_RODATA_BEGIN - RO_DATA(PAGE_SIZE) - X64_ALIGN_DEBUG_RODATA_END - - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - /* Start of data section */ - _sdata = .; - - /* init_task */ - INIT_TASK_DATA(THREAD_SIZE) - -#ifdef CONFIG_X86_32 - /* 32 bit has nosave before _edata */ - NOSAVE_DATA -#endif - - PAGE_ALIGNED_DATA(PAGE_SIZE) - - CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) - - DATA_DATA - CONSTRUCTORS - - /* rarely changed data like cpu maps */ - READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) - - /* End of data section */ - _edata = .; - } :data - -#ifdef CONFIG_X86_64 - - . = ALIGN(PAGE_SIZE); - __vvar_page = .; - - .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { - /* work around gold bug 13023 */ - __vvar_beginning_hack = .; - - /* Place all vvars at the offsets in asm/vvar.h. */ -#define EMIT_VVAR(name, offset) \ - . = __vvar_beginning_hack + offset; \ - *(.vvar_ ## name) -#define __VVAR_KERNEL_LDS -#include <asm/vvar.h> -#undef __VVAR_KERNEL_LDS -#undef EMIT_VVAR - - } :data - - . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); - -#endif /* CONFIG_X86_64 */ - - /* Init code and data - will be freed after init */ - . = ALIGN(PAGE_SIZE); - .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { - __init_begin = .; /* paired with __init_end */ - } - -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - .init.text - should - * start another segment - init. - */ - PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) -#endif - - INIT_TEXT_SECTION(PAGE_SIZE) -#ifdef CONFIG_X86_64 - :init -#endif - - INIT_DATA_SECTION(16) - - /* - * Code and data for a variety of lowlevel trampolines, to be - * copied into base memory (< 1 MiB) during initialization. - * Since it is copied early, the main copy can be discarded - * afterwards. - */ - .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) { - x86_trampoline_start = .; - *(.x86_trampoline) - x86_trampoline_end = .; - } - - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - - /* - * start address and size of operations which during runtime - * can be patched with virtualization friendly instructions or - * baremetal native ones. Think page table operations. - * Details in paravirt_types.h - */ - . = ALIGN(8); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - - /* - * struct alt_inst entries. From the header (alternative.h): - * "Alternative instructions for different CPU types or capabilities" - * Think locking instructions on spinlocks. - */ - . = ALIGN(8); - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - - /* - * And here are the replacement instructions. The linker sticks - * them as binary blobs. The .altinstructions has enough data to - * get the address and the length of them to patch the kernel safely. - */ - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - - /* - * struct iommu_table_entry entries are injected in this section. - * It is an array of IOMMUs which during run time gets sorted depending - * on its dependency order. After rootfs_initcall is complete - * this section can be safely removed. - */ - .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) { - __iommu_table = .; - *(.iommu_table) - __iommu_table_end = .; - } - - . = ALIGN(8); - .apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) { - __apicdrivers = .; - *(.apicdrivers); - __apicdrivers_end = .; - } - - . = ALIGN(8); - /* - * .exit.text is discard at runtime, not link time, to deal with - * references from .altinstructions and .eh_frame - */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } - -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU_SECTION(INTERNODE_CACHE_BYTES) -#endif - - . = ALIGN(PAGE_SIZE); - - /* freed after init ends here */ - .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) { - __init_end = .; - } - - /* - * smp_locks might be freed after init - * start/end must be page aligned - */ - . = ALIGN(PAGE_SIZE); - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - __smp_locks = .; - *(.smp_locks) - . = ALIGN(PAGE_SIZE); - __smp_locks_end = .; - } - -#ifdef CONFIG_X86_64 - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - NOSAVE_DATA - } -#endif - - /* BSS */ - . = ALIGN(PAGE_SIZE); - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __bss_start = .; - *(.bss..page_aligned) - *(.bss) - . = ALIGN(PAGE_SIZE); - __bss_stop = .; - } - - . = ALIGN(PAGE_SIZE); - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - __brk_base = .; - . += 64 * 1024; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = .; - } - - _end = .; - - STABS_DEBUG - DWARF_DEBUG - - /* Sections to be discarded */ - DISCARDS - /DISCARD/ : { *(.eh_frame) } -} - - -#ifdef CONFIG_X86_32 -/* - * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: - */ -. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); -#else -/* - * Per-cpu symbols which need to be offset from __per_cpu_load - * for the boot processor. - */ -#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load -INIT_PER_CPU(gdt_page); -INIT_PER_CPU(irq_stack_union); - -/* - * Build-time check on the image size: - */ -. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); - -#ifdef CONFIG_SMP -. = ASSERT((irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); -#endif - -#endif /* CONFIG_X86_32 */ - -#ifdef CONFIG_KEXEC -#include <asm/kexec.h> - -. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); -#endif - diff --git a/ANDROID_3.4.5/arch/x86/kernel/vsmp_64.c b/ANDROID_3.4.5/arch/x86/kernel/vsmp_64.c deleted file mode 100644 index a1d804bc..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/vsmp_64.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * vSMPowered(tm) systems specific initialization - * Copyright (C) 2005 ScaleMP Inc. - * - * Use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - * - * Ravikiran Thirumalai <kiran@scalemp.com>, - * Shai Fultheim <shai@scalemp.com> - * Paravirt ops integration: Glauber de Oliveira Costa <gcosta@redhat.com>, - * Ravikiran Thirumalai <kiran@scalemp.com> - */ - -#include <linux/init.h> -#include <linux/pci_ids.h> -#include <linux/pci_regs.h> - -#include <asm/apic.h> -#include <asm/pci-direct.h> -#include <asm/io.h> -#include <asm/paravirt.h> -#include <asm/setup.h> - -#if defined CONFIG_PCI && defined CONFIG_PARAVIRT -/* - * Interrupt control on vSMPowered systems: - * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' - * and vice versa. - */ - -static unsigned long vsmp_save_fl(void) -{ - unsigned long flags = native_save_fl(); - - if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC)) - flags &= ~X86_EFLAGS_IF; - return flags; -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); - -static void vsmp_restore_fl(unsigned long flags) -{ - if (flags & X86_EFLAGS_IF) - flags &= ~X86_EFLAGS_AC; - else - flags |= X86_EFLAGS_AC; - native_restore_fl(flags); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); - -static void vsmp_irq_disable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); - -static void vsmp_irq_enable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); - -static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len) -{ - switch (type) { - case PARAVIRT_PATCH(pv_irq_ops.irq_enable): - case PARAVIRT_PATCH(pv_irq_ops.irq_disable): - case PARAVIRT_PATCH(pv_irq_ops.save_fl): - case PARAVIRT_PATCH(pv_irq_ops.restore_fl): - return paravirt_patch_default(type, clobbers, ibuf, addr, len); - default: - return native_patch(type, clobbers, ibuf, addr, len); - } - -} - -static void __init set_vsmp_pv_ops(void) -{ - void __iomem *address; - unsigned int cap, ctl, cfg; - - /* set vSMP magic bits to indicate vSMP capable kernel */ - cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0); - address = early_ioremap(cfg, 8); - cap = readl(address); - ctl = readl(address + 4); - printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", - cap, ctl); - if (cap & ctl & (1 << 4)) { - /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); - pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); - pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); - pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); - pv_init_ops.patch = vsmp_patch; - - ctl &= ~(1 << 4); - writel(ctl, address + 4); - ctl = readl(address + 4); - printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); - } - - early_iounmap(address, 8); -} -#else -static void __init set_vsmp_pv_ops(void) -{ -} -#endif - -#ifdef CONFIG_PCI -static int is_vsmp = -1; - -static void __init detect_vsmp_box(void) -{ - is_vsmp = 0; - - if (!early_pci_allowed()) - return; - - /* Check if we are running on a ScaleMP vSMPowered box */ - if (read_pci_config(0, 0x1f, 0, PCI_VENDOR_ID) == - (PCI_VENDOR_ID_SCALEMP | (PCI_DEVICE_ID_SCALEMP_VSMP_CTL << 16))) - is_vsmp = 1; -} - -int is_vsmp_box(void) -{ - if (is_vsmp != -1) - return is_vsmp; - else { - WARN_ON_ONCE(1); - return 0; - } -} - -#else -static void __init detect_vsmp_box(void) -{ -} -int is_vsmp_box(void) -{ - return 0; -} -#endif -void __init vsmp_init(void) -{ - detect_vsmp_box(); - if (!is_vsmp_box()) - return; - - set_vsmp_pv_ops(); - return; -} diff --git a/ANDROID_3.4.5/arch/x86/kernel/vsyscall_64.c b/ANDROID_3.4.5/arch/x86/kernel/vsyscall_64.c deleted file mode 100644 index 7515cf0e..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/vsyscall_64.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright 2003 Andi Kleen, SuSE Labs. - * - * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] - * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. - * - * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located - * at virtual address -10Mbyte+1024bytes etc... There are at max 4 - * vsyscalls. One vsyscall can reserve more than 1 slot to avoid - * jumping out of line if necessary. We cannot add more with this - * mechanism because older kernels won't return -ENOSYS. - * - * Note: the concept clashes with user mode linux. UML users should - * use the vDSO. - */ - -#include <linux/time.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/seqlock.h> -#include <linux/jiffies.h> -#include <linux/sysctl.h> -#include <linux/topology.h> -#include <linux/clocksource.h> -#include <linux/getcpu.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <linux/notifier.h> -#include <linux/syscalls.h> -#include <linux/ratelimit.h> - -#include <asm/vsyscall.h> -#include <asm/pgtable.h> -#include <asm/compat.h> -#include <asm/page.h> -#include <asm/unistd.h> -#include <asm/fixmap.h> -#include <asm/errno.h> -#include <asm/io.h> -#include <asm/segment.h> -#include <asm/desc.h> -#include <asm/topology.h> -#include <asm/vgtod.h> -#include <asm/traps.h> - -#define CREATE_TRACE_POINTS -#include "vsyscall_trace.h" - -DEFINE_VVAR(int, vgetcpu_mode); -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); - -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; - -static int __init vsyscall_setup(char *str) -{ - if (str) { - if (!strcmp("emulate", str)) - vsyscall_mode = EMULATE; - else if (!strcmp("native", str)) - vsyscall_mode = NATIVE; - else if (!strcmp("none", str)) - vsyscall_mode = NONE; - else - return -EINVAL; - - return 0; - } - - return -EINVAL; -} -early_param("vsyscall", vsyscall_setup); - -void update_vsyscall_tz(void) -{ - vsyscall_gtod_data.sys_tz = sys_tz; -} - -void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) -{ - struct timespec monotonic; - - write_seqcount_begin(&vsyscall_gtod_data.seq); - - /* copy vsyscall data */ - vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; - vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; - vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = mult; - vsyscall_gtod_data.clock.shift = clock->shift; - - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - - monotonic = timespec_add(*wall_time, *wtm); - vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec; - vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec; - - vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); - vsyscall_gtod_data.monotonic_time_coarse = - timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm); - - write_seqcount_end(&vsyscall_gtod_data.seq); -} - -static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, - const char *message) -{ - static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - struct task_struct *tsk; - - if (!show_unhandled_signals || !__ratelimit(&rs)) - return; - - tsk = current; - - printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", - level, tsk->comm, task_pid_nr(tsk), - message, regs->ip, regs->cs, - regs->sp, regs->ax, regs->si, regs->di); -} - -static int addr_to_vsyscall_nr(unsigned long addr) -{ - int nr; - - if ((addr & ~0xC00UL) != VSYSCALL_START) - return -EINVAL; - - nr = (addr & 0xC00UL) >> 10; - if (nr >= 3) - return -EINVAL; - - return nr; -} - -static bool write_ok_or_segv(unsigned long ptr, size_t size) -{ - /* - * XXX: if access_ok, get_user, and put_user handled - * sig_on_uaccess_error, this could go away. - */ - - if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { - siginfo_t info; - struct thread_struct *thread = ¤t->thread; - - thread->error_code = 6; /* user fault, no page, write */ - thread->cr2 = ptr; - thread->trap_nr = X86_TRAP_PF; - - memset(&info, 0, sizeof(info)); - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *)ptr; - - force_sig_info(SIGSEGV, &info, current); - return false; - } else { - return true; - } -} - -bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) -{ - struct task_struct *tsk; - unsigned long caller; - int vsyscall_nr; - int prev_sig_on_uaccess_error; - long ret; - - /* - * No point in checking CS -- the only way to get here is a user mode - * trap to a high address, which means that we're in 64-bit user code. - */ - - WARN_ON_ONCE(address != regs->ip); - - if (vsyscall_mode == NONE) { - warn_bad_vsyscall(KERN_INFO, regs, - "vsyscall attempted with vsyscall=none"); - return false; - } - - vsyscall_nr = addr_to_vsyscall_nr(address); - - trace_emulate_vsyscall(vsyscall_nr); - - if (vsyscall_nr < 0) { - warn_bad_vsyscall(KERN_WARNING, regs, - "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); - goto sigsegv; - } - - if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { - warn_bad_vsyscall(KERN_WARNING, regs, - "vsyscall with bad stack (exploit attempt?)"); - goto sigsegv; - } - - tsk = current; - if (seccomp_mode(&tsk->seccomp)) - do_exit(SIGKILL); - - /* - * With a real vsyscall, page faults cause SIGSEGV. We want to - * preserve that behavior to make writing exploits harder. - */ - prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; - current_thread_info()->sig_on_uaccess_error = 1; - - /* - * NULL is a valid user pointer (in the access_ok sense) on 32-bit and - * 64-bit, so we don't need to special-case it here. For all the - * vsyscalls, NULL means "don't write anything" not "write it at - * address 0". - */ - ret = -EFAULT; - switch (vsyscall_nr) { - case 0: - if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || - !write_ok_or_segv(regs->si, sizeof(struct timezone))) - break; - - ret = sys_gettimeofday( - (struct timeval __user *)regs->di, - (struct timezone __user *)regs->si); - break; - - case 1: - if (!write_ok_or_segv(regs->di, sizeof(time_t))) - break; - - ret = sys_time((time_t __user *)regs->di); - break; - - case 2: - if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || - !write_ok_or_segv(regs->si, sizeof(unsigned))) - break; - - ret = sys_getcpu((unsigned __user *)regs->di, - (unsigned __user *)regs->si, - NULL); - break; - } - - current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; - - if (ret == -EFAULT) { - /* Bad news -- userspace fed a bad pointer to a vsyscall. */ - warn_bad_vsyscall(KERN_INFO, regs, - "vsyscall fault (exploit attempt?)"); - - /* - * If we failed to generate a signal for any reason, - * generate one here. (This should be impossible.) - */ - if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && - !sigismember(&tsk->pending.signal, SIGSEGV))) - goto sigsegv; - - return true; /* Don't emulate the ret. */ - } - - regs->ax = ret; - - /* Emulate a ret instruction. */ - regs->ip = caller; - regs->sp += 8; - - return true; - -sigsegv: - force_sig(SIGSEGV, current); - return true; -} - -/* - * Assume __initcall executes before all user space. Hopefully kmod - * doesn't violate that. We'll find out if it does. - */ -static void __cpuinit vsyscall_set_cpu(int cpu) -{ - unsigned long d; - unsigned long node = 0; -#ifdef CONFIG_NUMA - node = cpu_to_node(cpu); -#endif - if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) - write_rdtscp_aux((node << 12) | cpu); - - /* - * Store cpu number in limit so that it can be loaded quickly - * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) - */ - d = 0x0f40000000000ULL; - d |= cpu; - d |= (node & 0xf) << 12; - d |= (node >> 4) << 48; - - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); -} - -static void __cpuinit cpu_vsyscall_init(void *arg) -{ - /* preemption should be already off */ - vsyscall_set_cpu(raw_smp_processor_id()); -} - -static int __cpuinit -cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) -{ - long cpu = (long)arg; - - if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) - smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); - - return NOTIFY_DONE; -} - -void __init map_vsyscall(void) -{ - extern char __vsyscall_page; - unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - extern char __vvar_page; - unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); - - __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); - BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != - (unsigned long)VSYSCALL_START); - - __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); - BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != - (unsigned long)VVAR_ADDRESS); -} - -static int __init vsyscall_init(void) -{ - BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); - - on_each_cpu(cpu_vsyscall_init, NULL, 1); - /* notifier priority > KVM */ - hotcpu_notifier(cpu_vsyscall_notifier, 30); - - return 0; -} -__initcall(vsyscall_init); diff --git a/ANDROID_3.4.5/arch/x86/kernel/vsyscall_emu_64.S b/ANDROID_3.4.5/arch/x86/kernel/vsyscall_emu_64.S deleted file mode 100644 index c9596a9a..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/vsyscall_emu_64.S +++ /dev/null @@ -1,37 +0,0 @@ -/* - * vsyscall_emu_64.S: Vsyscall emulation page - * - * Copyright (c) 2011 Andy Lutomirski - * - * Subject to the GNU General Public License, version 2 - */ - -#include <linux/linkage.h> - -#include <asm/irq_vectors.h> -#include <asm/page_types.h> -#include <asm/unistd_64.h> - -__PAGE_ALIGNED_DATA - .globl __vsyscall_page - .balign PAGE_SIZE, 0xcc - .type __vsyscall_page, @object -__vsyscall_page: - - mov $__NR_gettimeofday, %rax - syscall - ret - - .balign 1024, 0xcc - mov $__NR_time, %rax - syscall - ret - - .balign 1024, 0xcc - mov $__NR_getcpu, %rax - syscall - ret - - .balign 4096, 0xcc - - .size __vsyscall_page, 4096 diff --git a/ANDROID_3.4.5/arch/x86/kernel/vsyscall_trace.h b/ANDROID_3.4.5/arch/x86/kernel/vsyscall_trace.h deleted file mode 100644 index a8b2edec..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/vsyscall_trace.h +++ /dev/null @@ -1,29 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM vsyscall - -#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __VSYSCALL_TRACE_H - -#include <linux/tracepoint.h> - -TRACE_EVENT(emulate_vsyscall, - - TP_PROTO(int nr), - - TP_ARGS(nr), - - TP_STRUCT__entry(__field(int, nr)), - - TP_fast_assign( - __entry->nr = nr; - ), - - TP_printk("nr = %d", __entry->nr) -); - -#endif - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../arch/x86/kernel -#define TRACE_INCLUDE_FILE vsyscall_trace -#include <trace/define_trace.h> diff --git a/ANDROID_3.4.5/arch/x86/kernel/x8664_ksyms_64.c b/ANDROID_3.4.5/arch/x86/kernel/x8664_ksyms_64.c deleted file mode 100644 index 9796c2f3..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/x8664_ksyms_64.c +++ /dev/null @@ -1,60 +0,0 @@ -/* Exports for assembly files. - All C exports should go in the respective C files. */ - -#include <linux/module.h> -#include <linux/smp.h> - -#include <net/checksum.h> - -#include <asm/processor.h> -#include <asm/pgtable.h> -#include <asm/uaccess.h> -#include <asm/desc.h> -#include <asm/ftrace.h> - -#ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ -EXPORT_SYMBOL(mcount); -#endif - -EXPORT_SYMBOL(__get_user_1); -EXPORT_SYMBOL(__get_user_2); -EXPORT_SYMBOL(__get_user_4); -EXPORT_SYMBOL(__get_user_8); -EXPORT_SYMBOL(__put_user_1); -EXPORT_SYMBOL(__put_user_2); -EXPORT_SYMBOL(__put_user_4); -EXPORT_SYMBOL(__put_user_8); - -EXPORT_SYMBOL(copy_user_generic_string); -EXPORT_SYMBOL(copy_user_generic_unrolled); -EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(_copy_from_user); -EXPORT_SYMBOL(_copy_to_user); - -EXPORT_SYMBOL(copy_page); -EXPORT_SYMBOL(clear_page); - -EXPORT_SYMBOL(csum_partial); - -/* - * Export string functions. We normally rely on gcc builtin for most of these, - * but gcc sometimes decides not to inline them. - */ -#undef memcpy -#undef memset -#undef memmove - -extern void *memset(void *, int, __kernel_size_t); -extern void *memcpy(void *, const void *, __kernel_size_t); -extern void *__memcpy(void *, const void *, __kernel_size_t); - -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(__memcpy); -EXPORT_SYMBOL(memmove); - -EXPORT_SYMBOL(empty_zero_page); -#ifndef CONFIG_PARAVIRT -EXPORT_SYMBOL(native_load_gs_index); -#endif diff --git a/ANDROID_3.4.5/arch/x86/kernel/x86_init.c b/ANDROID_3.4.5/arch/x86/kernel/x86_init.c deleted file mode 100644 index 9cf71d0b..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/x86_init.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de> - * - * For licencing details see kernel-base/COPYING - */ -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/module.h> -#include <linux/pci.h> - -#include <asm/bios_ebda.h> -#include <asm/paravirt.h> -#include <asm/pci_x86.h> -#include <asm/pci.h> -#include <asm/mpspec.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/e820.h> -#include <asm/time.h> -#include <asm/irq.h> -#include <asm/pat.h> -#include <asm/tsc.h> -#include <asm/iommu.h> -#include <asm/mach_traps.h> - -void __cpuinit x86_init_noop(void) { } -void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_noop(pgd_t *unused) { } -int __init iommu_init_noop(void) { return 0; } -void iommu_shutdown_noop(void) { } -void wallclock_init_noop(void) { } - -/* - * The platform setup functions are preset with the default functions - * for standard PC hardware. - */ -struct x86_init_ops x86_init __initdata = { - - .resources = { - .probe_roms = probe_roms, - .reserve_resources = reserve_standard_io_resources, - .memory_setup = default_machine_specific_memory_setup, - }, - - .mpparse = { - .mpc_record = x86_init_uint_noop, - .setup_ioapic_ids = x86_init_noop, - .mpc_apic_id = default_mpc_apic_id, - .smp_read_mpc_oem = default_smp_read_mpc_oem, - .mpc_oem_bus_info = default_mpc_oem_bus_info, - .find_smp_config = default_find_smp_config, - .get_smp_config = default_get_smp_config, - }, - - .irqs = { - .pre_vector_init = init_ISA_irqs, - .intr_init = native_init_IRQ, - .trap_init = x86_init_noop, - }, - - .oem = { - .arch_setup = x86_init_noop, - .banner = default_banner, - }, - - .mapping = { - .pagetable_reserve = native_pagetable_reserve, - }, - - .paging = { - .pagetable_setup_start = native_pagetable_setup_start, - .pagetable_setup_done = native_pagetable_setup_done, - }, - - .timers = { - .setup_percpu_clockev = setup_boot_APIC_clock, - .tsc_pre_init = x86_init_noop, - .timer_init = hpet_time_init, - .wallclock_init = x86_init_noop, - }, - - .iommu = { - .iommu_init = iommu_init_noop, - }, - - .pci = { - .init = x86_default_pci_init, - .init_irq = x86_default_pci_init_irq, - .fixup_irqs = x86_default_pci_fixup_irqs, - }, -}; - -struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { - .early_percpu_clock_init = x86_init_noop, - .setup_percpu_clockev = setup_secondary_APIC_clock, -}; - -static void default_nmi_init(void) { }; -static int default_i8042_detect(void) { return 1; }; - -struct x86_platform_ops x86_platform = { - .calibrate_tsc = native_calibrate_tsc, - .wallclock_init = wallclock_init_noop, - .get_wallclock = mach_get_cmos_time, - .set_wallclock = mach_set_rtc_mmss, - .iommu_shutdown = iommu_shutdown_noop, - .is_untracked_pat_range = is_ISA_range, - .nmi_init = default_nmi_init, - .get_nmi_reason = default_get_nmi_reason, - .i8042_detect = default_i8042_detect, - .save_sched_clock_state = tsc_save_sched_clock_state, - .restore_sched_clock_state = tsc_restore_sched_clock_state, -}; - -EXPORT_SYMBOL_GPL(x86_platform); -struct x86_msi_ops x86_msi = { - .setup_msi_irqs = native_setup_msi_irqs, - .teardown_msi_irq = native_teardown_msi_irq, - .teardown_msi_irqs = default_teardown_msi_irqs, - .restore_msi_irqs = default_restore_msi_irqs, -}; diff --git a/ANDROID_3.4.5/arch/x86/kernel/xsave.c b/ANDROID_3.4.5/arch/x86/kernel/xsave.c deleted file mode 100644 index e62728e3..00000000 --- a/ANDROID_3.4.5/arch/x86/kernel/xsave.c +++ /dev/null @@ -1,473 +0,0 @@ -/* - * xsave/xrstor support. - * - * Author: Suresh Siddha <suresh.b.siddha@intel.com> - */ -#include <linux/bootmem.h> -#include <linux/compat.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#ifdef CONFIG_IA32_EMULATION -#include <asm/sigcontext32.h> -#endif -#include <asm/xcr.h> - -/* - * Supported feature mask by the CPU and the kernel. - */ -u64 pcntxt_mask; - -/* - * Represents init state for the supported extended state. - */ -static struct xsave_struct *init_xstate_buf; - -struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -struct _fpx_sw_bytes fx_sw_reserved_ia32; -#endif - -static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; - -/* - * If a processor implementation discern that a processor state component is - * in its initialized state it may modify the corresponding bit in the - * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory - * layout in the case of xsaveopt. While presenting the xstate information to - * the user, we always ensure that the memory layout of a feature will be in - * the init state if the corresponding header bit is zero. This is to ensure - * that the user doesn't see some stale state in the memory layout during - * signal handling, debugging etc. - */ -void __sanitize_i387_state(struct task_struct *tsk) -{ - u64 xstate_bv; - int feature_bit = 0x2; - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - - if (!fx) - return; - - BUG_ON(__thread_has_fpu(tsk)); - - xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; - - /* - * None of the feature bits are in init state. So nothing else - * to do for us, as the memory layout is up to date. - */ - if ((xstate_bv & pcntxt_mask) == pcntxt_mask) - return; - - /* - * FP is in init state - */ - if (!(xstate_bv & XSTATE_FP)) { - fx->cwd = 0x37f; - fx->swd = 0; - fx->twd = 0; - fx->fop = 0; - fx->rip = 0; - fx->rdp = 0; - memset(&fx->st_space[0], 0, 128); - } - - /* - * SSE is in init state - */ - if (!(xstate_bv & XSTATE_SSE)) - memset(&fx->xmm_space[0], 0, 256); - - xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; - - /* - * Update all the other memory layouts for which the corresponding - * header bit is in the init state. - */ - while (xstate_bv) { - if (xstate_bv & 0x1) { - int offset = xstate_offsets[feature_bit]; - int size = xstate_sizes[feature_bit]; - - memcpy(((void *) fx) + offset, - ((void *) init_xstate_buf) + offset, - size); - } - - xstate_bv >>= 1; - feature_bit++; - } -} - -/* - * Check for the presence of extended state information in the - * user fpstate pointer in the sigcontext. - */ -int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw_user) -{ - int min_xstate_size = sizeof(struct i387_fxsave_struct) + - sizeof(struct xsave_hdr_struct); - unsigned int magic2; - int err; - - err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], - sizeof(struct _fpx_sw_bytes)); - if (err) - return -EFAULT; - - /* - * First Magic check failed. - */ - if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) - return -EINVAL; - - /* - * Check for error scenarios. - */ - if (fx_sw_user->xstate_size < min_xstate_size || - fx_sw_user->xstate_size > xstate_size || - fx_sw_user->xstate_size > fx_sw_user->extended_size) - return -EINVAL; - - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return err; - /* - * Check for the presence of second magic word at the end of memory - * layout. This detects the case where the user just copied the legacy - * fpstate layout with out copying the extended state information - * in the memory layout. - */ - if (magic2 != FP_XSTATE_MAGIC2) - return -EFAULT; - - return 0; -} - -#ifdef CONFIG_X86_64 -/* - * Signal frame handlers. - */ - -int save_i387_xstate(void __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) - return -EACCES; - - BUG_ON(sig_xstate_size < xstate_size); - - if ((unsigned long)buf % 64) - printk("save_i387_xstate: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - - if (user_has_fpu()) { - if (use_xsave()) - err = xsave_user(buf); - else - err = fxsave_user(buf); - - if (err) - return err; - user_fpu_end(); - } else { - sanitize_i387_state(tsk); - if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, - xstate_size)) - return -1; - } - - clear_used_math(); /* trigger finit */ - - if (use_xsave()) { - struct _fpstate __user *fx = buf; - struct _xstate __user *x = buf; - u64 xstate_bv; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_size - - FP_XSTATE_MAGIC2_SIZE)); - - /* - * Read the xstate_bv which we copied (directly from the cpu or - * from the state in task struct) to the user buffers and - * set the FP/SSE bits. - */ - err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware apps can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xstate_bv |= XSTATE_FPSSE; - - err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); - - if (err) - return err; - } - - return 1; -} - -/* - * Restore the extended state if present. Otherwise, restore the FP/SSE - * state. - */ -static int restore_user_xstate(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - u64 mask; - int err; - - if (((unsigned long)buf % 64) || - check_for_xstate(buf, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - /* - * restore the state passed by the user. - */ - err = xrestore_user(buf, mask); - if (err) - return err; - - /* - * init the state skipped by the user. - */ - mask = pcntxt_mask & ~mask; - if (unlikely(mask)) - xrstor_state(init_xstate_buf, mask); - - return 0; - -fx_only: - /* - * couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -int restore_i387_xstate(void __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - if (!buf) { - if (used_math()) - goto clear; - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) - return -EACCES; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - user_fpu_begin(); - if (use_xsave()) - err = restore_user_xstate(buf); - else - err = fxrstor_checking((__force struct i387_fxsave_struct *) - buf); - if (unlikely(err)) { - /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. - */ -clear: - clear_fpu(tsk); - clear_used_math(); - } - return err; -} -#endif - -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -static void prepare_fx_sw_frame(void) -{ - int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + - FP_XSTATE_MAGIC2_SIZE; - - sig_xstate_size = sizeof(struct _fpstate) + size_extended; - -#ifdef CONFIG_IA32_EMULATION - sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; -#endif - - memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = sig_xstate_size; - fx_sw_reserved.xstate_bv = pcntxt_mask; - fx_sw_reserved.xstate_size = xstate_size; -#ifdef CONFIG_IA32_EMULATION - memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; -#endif -} - -#ifdef CONFIG_X86_64 -unsigned int sig_xstate_size = sizeof(struct _fpstate); -#endif - -/* - * Enable the extended processor state save/restore feature - */ -static inline void xstate_enable(void) -{ - set_in_cr4(X86_CR4_OSXSAVE); - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); -} - -/* - * Record the offsets and sizes of different state managed by the xsave - * memory layout. - */ -static void __init setup_xstate_features(void) -{ - int eax, ebx, ecx, edx, leaf = 0x2; - - xstate_features = fls64(pcntxt_mask); - xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); - xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); - - do { - cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); - - if (eax == 0) - break; - - xstate_offsets[leaf] = ebx; - xstate_sizes[leaf] = eax; - - leaf++; - } while (1); -} - -/* - * setup the xstate image representing the init state - */ -static void __init setup_xstate_init(void) -{ - setup_xstate_features(); - - /* - * Setup init_xstate_buf to represent the init state of - * all the features managed by the xsave - */ - init_xstate_buf = alloc_bootmem_align(xstate_size, - __alignof__(struct xsave_struct)); - init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; - - clts(); - /* - * Init all the features state with header_bv being 0x0 - */ - xrstor_state(init_xstate_buf, -1); - /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. - */ - xsave_state(init_xstate_buf, -1); - stts(); -} - -/* - * Enable and initialize the xsave feature. - */ -static void __init xstate_enable_boot_cpu(void) -{ - unsigned int eax, ebx, ecx, edx; - - if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { - WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); - return; - } - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - pcntxt_mask = eax + ((u64)edx << 32); - - if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", - pcntxt_mask); - BUG(); - } - - /* - * Support only the state known to OS. - */ - pcntxt_mask = pcntxt_mask & XCNTXT_MASK; - - xstate_enable(); - - /* - * Recompute the context size for enabled features - */ - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; - - update_regset_xstate_info(xstate_size, pcntxt_mask); - prepare_fx_sw_frame(); - - setup_xstate_init(); - - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " - "cntxt size 0x%x\n", - pcntxt_mask, xstate_size); -} - -/* - * For the very first instance, this calls xstate_enable_boot_cpu(); - * for all subsequent instances, this calls xstate_enable(). - * - * This is somewhat obfuscated due to the lack of powerful enough - * overrides for the section checks. - */ -void __cpuinit xsave_init(void) -{ - static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; - void (*this_func)(void); - - if (!cpu_has_xsave) - return; - - this_func = next_func; - next_func = xstate_enable; - this_func(); -} |