diff options
Diffstat (limited to 'arch/x86/platform')
41 files changed, 10224 insertions, 0 deletions
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile new file mode 100644 index 00000000..8d874396 --- /dev/null +++ b/arch/x86/platform/Makefile @@ -0,0 +1,11 @@ +# Platform specific code goes here +obj-y += ce4100/ +obj-y += efi/ +obj-y += geode/ +obj-y += iris/ +obj-y += mrst/ +obj-y += olpc/ +obj-y += scx200/ +obj-y += sfi/ +obj-y += visws/ +obj-y += uv/ diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile new file mode 100644 index 00000000..91fc9297 --- /dev/null +++ b/arch/x86/platform/ce4100/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_INTEL_CE) += ce4100.o diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c new file mode 100644 index 00000000..4c61b521 --- /dev/null +++ b/arch/x86/platform/ce4100/ce4100.c @@ -0,0 +1,146 @@ +/* + * Intel CE4100 platform specific setup code + * + * (C) Copyright 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/serial_reg.h> +#include <linux/serial_8250.h> + +#include <asm/ce4100.h> +#include <asm/prom.h> +#include <asm/setup.h> +#include <asm/i8259.h> +#include <asm/io.h> +#include <asm/io_apic.h> + +static int ce4100_i8042_detect(void) +{ + return 0; +} + +#ifdef CONFIG_SERIAL_8250 + +static unsigned int mem_serial_in(struct uart_port *p, int offset) +{ + offset = offset << p->regshift; + return readl(p->membase + offset); +} + +/* + * The UART Tx interrupts are not set under some conditions and therefore serial + * transmission hangs. This is a silicon issue and has not been root caused. The + * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT + * bit of LSR register in interrupt handler to see whether at least one of these + * two bits is set, if so then process the transmit request. If this workaround + * is not applied, then the serial transmission may hang. This workaround is for + * errata number 9 in Errata - B step. +*/ + +static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset) +{ + unsigned int ret, ier, lsr; + + if (offset == UART_IIR) { + offset = offset << p->regshift; + ret = readl(p->membase + offset); + if (ret & UART_IIR_NO_INT) { + /* see if the TX interrupt should have really set */ + ier = mem_serial_in(p, UART_IER); + /* see if the UART's XMIT interrupt is enabled */ + if (ier & UART_IER_THRI) { + lsr = mem_serial_in(p, UART_LSR); + /* now check to see if the UART should be + generating an interrupt (but isn't) */ + if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) + ret &= ~UART_IIR_NO_INT; + } + } + } else + ret = mem_serial_in(p, offset); + return ret; +} + +static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + writel(value, p->membase + offset); +} + +static void ce4100_serial_fixup(int port, struct uart_port *up, + unsigned short *capabilites) +{ +#ifdef CONFIG_EARLY_PRINTK + /* + * Over ride the legacy port configuration that comes from + * asm/serial.h. Using the ioport driver then switching to the + * PCI memmaped driver hangs the IOAPIC + */ + if (up->iotype != UPIO_MEM32) { + up->uartclk = 14745600; + up->mapbase = 0xdffe0200; + set_fixmap_nocache(FIX_EARLYCON_MEM_BASE, + up->mapbase & PAGE_MASK); + up->membase = + (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); + up->membase += up->mapbase & ~PAGE_MASK; + up->iotype = UPIO_MEM32; + up->regshift = 2; + } +#endif + up->iobase = 0; + up->serial_in = ce4100_mem_serial_in; + up->serial_out = ce4100_mem_serial_out; + + *capabilites |= (1 << 12); +} + +static __init void sdv_serial_fixup(void) +{ + serial8250_set_isa_configurator(ce4100_serial_fixup); +} + +#else +static inline void sdv_serial_fixup(void) {}; +#endif + +static void __init sdv_arch_setup(void) +{ + sdv_serial_fixup(); +} + +#ifdef CONFIG_X86_IO_APIC +static void __cpuinit sdv_pci_init(void) +{ + x86_of_pci_init(); + /* We can't set this earlier, because we need to calibrate the timer */ + legacy_pic = &null_legacy_pic; +} +#endif + +/* + * CE4100 specific x86_init function overrides and early setup + * calls. + */ +void __init x86_ce4100_early_setup(void) +{ + x86_init.oem.arch_setup = sdv_arch_setup; + x86_platform.i8042_detect = ce4100_i8042_detect; + x86_init.resources.probe_roms = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.pci.init = ce4100_pci_init; + +#ifdef CONFIG_X86_IO_APIC + x86_init.pci.init_irq = sdv_pci_init; + x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck; +#endif +} diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts new file mode 100644 index 00000000..ce874f87 --- /dev/null +++ b/arch/x86/platform/ce4100/falconfalls.dts @@ -0,0 +1,433 @@ +/* + * CE4100 on Falcon Falls + * + * (c) Copyright 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ +/dts-v1/; +/ { + model = "intel,falconfalls"; + compatible = "intel,falconfalls"; + #address-cells = <1>; + #size-cells = <1>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "intel,ce4100"; + reg = <0>; + lapic = <&lapic0>; + }; + }; + + soc@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "intel,ce4100-cp"; + ranges; + + ioapic1: interrupt-controller@fec00000 { + #interrupt-cells = <2>; + compatible = "intel,ce4100-ioapic"; + interrupt-controller; + reg = <0xfec00000 0x1000>; + }; + + timer@fed00000 { + compatible = "intel,ce4100-hpet"; + reg = <0xfed00000 0x200>; + }; + + lapic0: interrupt-controller@fee00000 { + compatible = "intel,ce4100-lapic"; + reg = <0xfee00000 0x1000>; + }; + + pci@3fc { + #address-cells = <3>; + #size-cells = <2>; + compatible = "intel,ce4100-pci", "pci"; + device_type = "pci"; + bus-range = <0 0>; + ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000 + 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000 + 0x0000000 0 0x0 0x0 0 0x100>; + + /* Secondary IO-APIC */ + ioapic2: interrupt-controller@0,1 { + #interrupt-cells = <2>; + compatible = "intel,ce4100-ioapic"; + interrupt-controller; + reg = <0x100 0x0 0x0 0x0 0x0>; + assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>; + }; + + pci@1,0 { + #address-cells = <3>; + #size-cells = <2>; + compatible = "intel,ce4100-pci", "pci"; + device_type = "pci"; + bus-range = <1 1>; + reg = <0x0800 0x0 0x0 0x0 0x0>; + ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>; + + interrupt-parent = <&ioapic2>; + + display@2,0 { + compatible = "pci8086,2e5b.2", + "pci8086,2e5b", + "pciclass038000", + "pciclass0380"; + + reg = <0x11000 0x0 0x0 0x0 0x0>; + interrupts = <0 1>; + }; + + multimedia@3,0 { + compatible = "pci8086,2e5c.2", + "pci8086,2e5c", + "pciclass048000", + "pciclass0480"; + + reg = <0x11800 0x0 0x0 0x0 0x0>; + interrupts = <2 1>; + }; + + multimedia@4,0 { + compatible = "pci8086,2e5d.2", + "pci8086,2e5d", + "pciclass048000", + "pciclass0480"; + + reg = <0x12000 0x0 0x0 0x0 0x0>; + interrupts = <4 1>; + }; + + multimedia@4,1 { + compatible = "pci8086,2e5e.2", + "pci8086,2e5e", + "pciclass048000", + "pciclass0480"; + + reg = <0x12100 0x0 0x0 0x0 0x0>; + interrupts = <5 1>; + }; + + sound@6,0 { + compatible = "pci8086,2e5f.2", + "pci8086,2e5f", + "pciclass040100", + "pciclass0401"; + + reg = <0x13000 0x0 0x0 0x0 0x0>; + interrupts = <6 1>; + }; + + sound@6,1 { + compatible = "pci8086,2e5f.2", + "pci8086,2e5f", + "pciclass040100", + "pciclass0401"; + + reg = <0x13100 0x0 0x0 0x0 0x0>; + interrupts = <7 1>; + }; + + sound@6,2 { + compatible = "pci8086,2e60.2", + "pci8086,2e60", + "pciclass040100", + "pciclass0401"; + + reg = <0x13200 0x0 0x0 0x0 0x0>; + interrupts = <8 1>; + }; + + display@8,0 { + compatible = "pci8086,2e61.2", + "pci8086,2e61", + "pciclass038000", + "pciclass0380"; + + reg = <0x14000 0x0 0x0 0x0 0x0>; + interrupts = <9 1>; + }; + + display@8,1 { + compatible = "pci8086,2e62.2", + "pci8086,2e62", + "pciclass038000", + "pciclass0380"; + + reg = <0x14100 0x0 0x0 0x0 0x0>; + interrupts = <10 1>; + }; + + multimedia@8,2 { + compatible = "pci8086,2e63.2", + "pci8086,2e63", + "pciclass048000", + "pciclass0480"; + + reg = <0x14200 0x0 0x0 0x0 0x0>; + interrupts = <11 1>; + }; + + entertainment-encryption@9,0 { + compatible = "pci8086,2e64.2", + "pci8086,2e64", + "pciclass101000", + "pciclass1010"; + + reg = <0x14800 0x0 0x0 0x0 0x0>; + interrupts = <12 1>; + }; + + localbus@a,0 { + compatible = "pci8086,2e65.2", + "pci8086,2e65", + "pciclassff0000", + "pciclassff00"; + + reg = <0x15000 0x0 0x0 0x0 0x0>; + }; + + serial@b,0 { + compatible = "pci8086,2e66.2", + "pci8086,2e66", + "pciclass070003", + "pciclass0700"; + + reg = <0x15800 0x0 0x0 0x0 0x0>; + interrupts = <14 1>; + }; + + pcigpio: gpio@b,1 { + #gpio-cells = <2>; + #interrupt-cells = <2>; + compatible = "pci8086,2e67.2", + "pci8086,2e67", + "pciclassff0000", + "pciclassff00"; + + reg = <0x15900 0x0 0x0 0x0 0x0>; + interrupts = <15 1>; + interrupt-controller; + gpio-controller; + intel,muxctl = <0>; + }; + + i2c-controller@b,2 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "pci8086,2e68.2", + "pci8086,2e68", + "pciclass,ff0000", + "pciclass,ff00"; + + reg = <0x15a00 0x0 0x0 0x0 0x0>; + interrupts = <16 1>; + ranges = <0 0 0x02000000 0 0xdffe0500 0x100 + 1 0 0x02000000 0 0xdffe0600 0x100 + 2 0 0x02000000 0 0xdffe0700 0x100>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <0 0 0x100>; + }; + + i2c@1 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <1 0 0x100>; + + gpio@26 { + #gpio-cells = <2>; + compatible = "ti,pcf8575"; + reg = <0x26>; + gpio-controller; + }; + }; + + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <2 0 0x100>; + + gpio@26 { + #gpio-cells = <2>; + compatible = "ti,pcf8575"; + reg = <0x26>; + gpio-controller; + }; + }; + }; + + smard-card@b,3 { + compatible = "pci8086,2e69.2", + "pci8086,2e69", + "pciclass070500", + "pciclass0705"; + + reg = <0x15b00 0x0 0x0 0x0 0x0>; + interrupts = <15 1>; + }; + + spi-controller@b,4 { + #address-cells = <1>; + #size-cells = <0>; + compatible = + "pci8086,2e6a.2", + "pci8086,2e6a", + "pciclass,ff0000", + "pciclass,ff00"; + + reg = <0x15c00 0x0 0x0 0x0 0x0>; + interrupts = <15 1>; + + dac@0 { + compatible = "ti,pcm1755"; + reg = <0>; + spi-max-frequency = <115200>; + }; + + dac@1 { + compatible = "ti,pcm1609a"; + reg = <1>; + spi-max-frequency = <115200>; + }; + + eeprom@2 { + compatible = "atmel,at93c46"; + reg = <2>; + spi-max-frequency = <115200>; + }; + }; + + multimedia@b,7 { + compatible = "pci8086,2e6d.2", + "pci8086,2e6d", + "pciclassff0000", + "pciclassff00"; + + reg = <0x15f00 0x0 0x0 0x0 0x0>; + }; + + ethernet@c,0 { + compatible = "pci8086,2e6e.2", + "pci8086,2e6e", + "pciclass020000", + "pciclass0200"; + + reg = <0x16000 0x0 0x0 0x0 0x0>; + interrupts = <21 1>; + }; + + clock@c,1 { + compatible = "pci8086,2e6f.2", + "pci8086,2e6f", + "pciclassff0000", + "pciclassff00"; + + reg = <0x16100 0x0 0x0 0x0 0x0>; + interrupts = <3 1>; + }; + + usb@d,0 { + compatible = "pci8086,2e70.2", + "pci8086,2e70", + "pciclass0c0320", + "pciclass0c03"; + + reg = <0x16800 0x0 0x0 0x0 0x0>; + interrupts = <22 1>; + }; + + usb@d,1 { + compatible = "pci8086,2e70.2", + "pci8086,2e70", + "pciclass0c0320", + "pciclass0c03"; + + reg = <0x16900 0x0 0x0 0x0 0x0>; + interrupts = <22 1>; + }; + + sata@e,0 { + compatible = "pci8086,2e71.0", + "pci8086,2e71", + "pciclass010601", + "pciclass0106"; + + reg = <0x17000 0x0 0x0 0x0 0x0>; + interrupts = <23 1>; + }; + + flash@f,0 { + compatible = "pci8086,701.1", + "pci8086,701", + "pciclass050100", + "pciclass0501"; + + reg = <0x17800 0x0 0x0 0x0 0x0>; + interrupts = <13 1>; + }; + + entertainment-encryption@10,0 { + compatible = "pci8086,702.1", + "pci8086,702", + "pciclass101000", + "pciclass1010"; + + reg = <0x18000 0x0 0x0 0x0 0x0>; + }; + + co-processor@11,0 { + compatible = "pci8086,703.1", + "pci8086,703", + "pciclass0b4000", + "pciclass0b40"; + + reg = <0x18800 0x0 0x0 0x0 0x0>; + interrupts = <1 1>; + }; + + multimedia@12,0 { + compatible = "pci8086,704.0", + "pci8086,704", + "pciclass048000", + "pciclass0480"; + + reg = <0x19000 0x0 0x0 0x0 0x0>; + }; + }; + + isa@1f,0 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "isa"; + reg = <0xf800 0x0 0x0 0x0 0x0>; + ranges = <1 0 0 0 0 0x100>; + + rtc@70 { + compatible = "intel,ce4100-rtc", "motorola,mc146818"; + interrupts = <8 3>; + interrupt-parent = <&ioapic1>; + ctrl-reg = <2>; + freq-reg = <0x26>; + reg = <1 0x70 2>; + }; + }; + }; + }; +}; diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile new file mode 100644 index 00000000..73b8be0f --- /dev/null +++ b/arch/x86/platform/efi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c new file mode 100644 index 00000000..92660eda --- /dev/null +++ b/arch/x86/platform/efi/efi.c @@ -0,0 +1,946 @@ +/* + * Common EFI (Extensible Firmware Interface) support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu <fenghua.yu@intel.com> + * Bibo Mao <bibo.mao@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * Huang Ying <ying.huang@intel.com> + * + * Copied from efi_32.c to eliminate the duplicated code between EFI + * 32/64 support code. --ying 2007-10-26 + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: <goutham.rao@intel.com> + * Skip non-WB memory and ignore empty memory ranges. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/time.h> +#include <linux/io.h> +#include <linux/reboot.h> +#include <linux/bcd.h> + +#include <asm/setup.h> +#include <asm/efi.h> +#include <asm/time.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/x86_init.h> + +#define EFI_DEBUG 1 + +int efi_enabled; +EXPORT_SYMBOL(efi_enabled); + +struct efi __read_mostly efi = { + .mps = EFI_INVALID_TABLE_ADDR, + .acpi = EFI_INVALID_TABLE_ADDR, + .acpi20 = EFI_INVALID_TABLE_ADDR, + .smbios = EFI_INVALID_TABLE_ADDR, + .sal_systab = EFI_INVALID_TABLE_ADDR, + .boot_info = EFI_INVALID_TABLE_ADDR, + .hcdp = EFI_INVALID_TABLE_ADDR, + .uga = EFI_INVALID_TABLE_ADDR, + .uv_systab = EFI_INVALID_TABLE_ADDR, +}; +EXPORT_SYMBOL(efi); + +struct efi_memory_map memmap; + +bool efi_64bit; +static bool efi_native; + +static struct efi efi_phys __initdata; +static efi_system_table_t efi_systab __initdata; + +static int __init setup_noefi(char *arg) +{ + efi_enabled = 0; + return 0; +} +early_param("noefi", setup_noefi); + +int add_efi_memmap; +EXPORT_SYMBOL(add_efi_memmap); + +static int __init setup_add_efi_memmap(char *arg) +{ + add_efi_memmap = 1; + return 0; +} +early_param("add_efi_memmap", setup_add_efi_memmap); + + +static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + unsigned long flags; + efi_status_t status; + + spin_lock_irqsave(&rtc_lock, flags); + status = efi_call_virt2(get_time, tm, tc); + spin_unlock_irqrestore(&rtc_lock, flags); + return status; +} + +static efi_status_t virt_efi_set_time(efi_time_t *tm) +{ + unsigned long flags; + efi_status_t status; + + spin_lock_irqsave(&rtc_lock, flags); + status = efi_call_virt1(set_time, tm); + spin_unlock_irqrestore(&rtc_lock, flags); + return status; +} + +static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, + efi_bool_t *pending, + efi_time_t *tm) +{ + unsigned long flags; + efi_status_t status; + + spin_lock_irqsave(&rtc_lock, flags); + status = efi_call_virt3(get_wakeup_time, + enabled, pending, tm); + spin_unlock_irqrestore(&rtc_lock, flags); + return status; +} + +static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ + unsigned long flags; + efi_status_t status; + + spin_lock_irqsave(&rtc_lock, flags); + status = efi_call_virt2(set_wakeup_time, + enabled, tm); + spin_unlock_irqrestore(&rtc_lock, flags); + return status; +} + +static efi_status_t virt_efi_get_variable(efi_char16_t *name, + efi_guid_t *vendor, + u32 *attr, + unsigned long *data_size, + void *data) +{ + return efi_call_virt5(get_variable, + name, vendor, attr, + data_size, data); +} + +static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + return efi_call_virt3(get_next_variable, + name_size, name, vendor); +} + +static efi_status_t virt_efi_set_variable(efi_char16_t *name, + efi_guid_t *vendor, + u32 attr, + unsigned long data_size, + void *data) +{ + return efi_call_virt5(set_variable, + name, vendor, attr, + data_size, data); +} + +static efi_status_t virt_efi_query_variable_info(u32 attr, + u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + return efi_call_virt4(query_variable_info, attr, storage_space, + remaining_space, max_variable_size); +} + +static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) +{ + return efi_call_virt1(get_next_high_mono_count, count); +} + +static void virt_efi_reset_system(int reset_type, + efi_status_t status, + unsigned long data_size, + efi_char16_t *data) +{ + efi_call_virt4(reset_system, reset_type, status, + data_size, data); +} + +static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, + unsigned long sg_list) +{ + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + return efi_call_virt3(update_capsule, capsules, count, sg_list); +} + +static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, + u64 *max_size, + int *reset_type) +{ + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + return efi_call_virt4(query_capsule_caps, capsules, count, max_size, + reset_type); +} + +static efi_status_t __init phys_efi_set_virtual_address_map( + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + + efi_call_phys_prelog(); + status = efi_call_phys4(efi_phys.set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + efi_call_phys_epilog(); + return status; +} + +static efi_status_t __init phys_efi_get_time(efi_time_t *tm, + efi_time_cap_t *tc) +{ + unsigned long flags; + efi_status_t status; + + spin_lock_irqsave(&rtc_lock, flags); + efi_call_phys_prelog(); + status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), + virt_to_phys(tc)); + efi_call_phys_epilog(); + spin_unlock_irqrestore(&rtc_lock, flags); + return status; +} + +int efi_set_rtc_mmss(unsigned long nowtime) +{ + int real_seconds, real_minutes; + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; + + status = efi.get_time(&eft, &cap); + if (status != EFI_SUCCESS) { + pr_err("Oops: efitime: can't read time!\n"); + return -1; + } + + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - eft.minute) + 15)/30) & 1) + real_minutes += 30; + real_minutes %= 60; + eft.minute = real_minutes; + eft.second = real_seconds; + + status = efi.set_time(&eft); + if (status != EFI_SUCCESS) { + pr_err("Oops: efitime: can't write time!\n"); + return -1; + } + return 0; +} + +unsigned long efi_get_time(void) +{ + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; + + status = efi.get_time(&eft, &cap); + if (status != EFI_SUCCESS) + pr_err("Oops: efitime: can't read time!\n"); + + return mktime(eft.year, eft.month, eft.day, eft.hour, + eft.minute, eft.second); +} + +/* + * Tell the kernel about the EFI memory map. This might include + * more than the max 128 entries that can fit in the e820 legacy + * (zeropage) memory map. + */ + +static void __init do_add_efi_memmap(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + int e820_type; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_RAM; + else + e820_type = E820_RESERVED; + break; + case EFI_ACPI_RECLAIM_MEMORY: + e820_type = E820_ACPI; + break; + case EFI_ACPI_MEMORY_NVS: + e820_type = E820_NVS; + break; + case EFI_UNUSABLE_MEMORY: + e820_type = E820_UNUSABLE; + break; + default: + /* + * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE + * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO + * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE + */ + e820_type = E820_RESERVED; + break; + } + e820_add_region(start, size, e820_type); + } + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +} + +int __init efi_memblock_x86_reserve_range(void) +{ + unsigned long pmap; + +#ifdef CONFIG_X86_32 + /* Can't handle data above 4GB at this time */ + if (boot_params.efi_info.efi_memmap_hi) { + pr_err("Memory map is above 4GB, disabling EFI.\n"); + return -EINVAL; + } + pmap = boot_params.efi_info.efi_memmap; +#else + pmap = (boot_params.efi_info.efi_memmap | + ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); +#endif + memmap.phys_map = (void *)pmap; + memmap.nr_map = boot_params.efi_info.efi_memmap_size / + boot_params.efi_info.efi_memdesc_size; + memmap.desc_version = boot_params.efi_info.efi_memdesc_version; + memmap.desc_size = boot_params.efi_info.efi_memdesc_size; + memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); + + return 0; +} + +#if EFI_DEBUG +static void __init print_efi_memmap(void) +{ + efi_memory_desc_t *md; + void *p; + int i; + + for (p = memmap.map, i = 0; + p < memmap.map_end; + p += memmap.desc_size, i++) { + md = p; + pr_info("mem%02u: type=%u, attr=0x%llx, " + "range=[0x%016llx-0x%016llx) (%lluMB)\n", + i, md->type, md->attribute, md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + (md->num_pages >> (20 - EFI_PAGE_SHIFT))); + } +} +#endif /* EFI_DEBUG */ + +void __init efi_reserve_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + /* Only reserve where possible: + * - Not within any already allocated areas + * - Not over any memory area (really needed, if above?) + * - Not within any part of the kernel + * - Not the bios reserved area + */ + if ((start+size >= virt_to_phys(_text) + && start <= virt_to_phys(_end)) || + !e820_all_mapped(start, start+size, E820_RAM) || + memblock_is_region_reserved(start, size)) { + /* Could not reserve, skip it */ + md->num_pages = 0; + memblock_dbg("Could not reserve boot range " + "[0x%010llx-0x%010llx]\n", + start, start+size-1); + } else + memblock_reserve(start, size); + } +} + +static void __init efi_free_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + /* Could not reserve boot area */ + if (!size) + continue; + + free_bootmem_late(start, size); + } +} + +static int __init efi_systab_init(void *phys) +{ + if (efi_64bit) { + efi_system_table_64_t *systab64; + u64 tmp = 0; + + systab64 = early_ioremap((unsigned long)phys, + sizeof(*systab64)); + if (systab64 == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + + efi_systab.hdr = systab64->hdr; + efi_systab.fw_vendor = systab64->fw_vendor; + tmp |= systab64->fw_vendor; + efi_systab.fw_revision = systab64->fw_revision; + efi_systab.con_in_handle = systab64->con_in_handle; + tmp |= systab64->con_in_handle; + efi_systab.con_in = systab64->con_in; + tmp |= systab64->con_in; + efi_systab.con_out_handle = systab64->con_out_handle; + tmp |= systab64->con_out_handle; + efi_systab.con_out = systab64->con_out; + tmp |= systab64->con_out; + efi_systab.stderr_handle = systab64->stderr_handle; + tmp |= systab64->stderr_handle; + efi_systab.stderr = systab64->stderr; + tmp |= systab64->stderr; + efi_systab.runtime = (void *)(unsigned long)systab64->runtime; + tmp |= systab64->runtime; + efi_systab.boottime = (void *)(unsigned long)systab64->boottime; + tmp |= systab64->boottime; + efi_systab.nr_tables = systab64->nr_tables; + efi_systab.tables = systab64->tables; + tmp |= systab64->tables; + + early_iounmap(systab64, sizeof(*systab64)); +#ifdef CONFIG_X86_32 + if (tmp >> 32) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); + return -EINVAL; + } +#endif + } else { + efi_system_table_32_t *systab32; + + systab32 = early_ioremap((unsigned long)phys, + sizeof(*systab32)); + if (systab32 == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + + efi_systab.hdr = systab32->hdr; + efi_systab.fw_vendor = systab32->fw_vendor; + efi_systab.fw_revision = systab32->fw_revision; + efi_systab.con_in_handle = systab32->con_in_handle; + efi_systab.con_in = systab32->con_in; + efi_systab.con_out_handle = systab32->con_out_handle; + efi_systab.con_out = systab32->con_out; + efi_systab.stderr_handle = systab32->stderr_handle; + efi_systab.stderr = systab32->stderr; + efi_systab.runtime = (void *)(unsigned long)systab32->runtime; + efi_systab.boottime = (void *)(unsigned long)systab32->boottime; + efi_systab.nr_tables = systab32->nr_tables; + efi_systab.tables = systab32->tables; + + early_iounmap(systab32, sizeof(*systab32)); + } + + efi.systab = &efi_systab; + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect!\n"); + return -EINVAL; + } + if ((efi.systab->hdr.revision >> 16) == 0) + pr_err("Warning: System table version " + "%d.%02d, expected 1.00 or greater!\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + return 0; +} + +static int __init efi_config_init(u64 tables, int nr_tables) +{ + void *config_tables, *tablep; + int i, sz; + + if (efi_64bit) + sz = sizeof(efi_config_table_64_t); + else + sz = sizeof(efi_config_table_32_t); + + /* + * Let's see what config tables the firmware passed to us. + */ + config_tables = early_ioremap(tables, nr_tables * sz); + if (config_tables == NULL) { + pr_err("Could not map Configuration table!\n"); + return -ENOMEM; + } + + tablep = config_tables; + pr_info(""); + for (i = 0; i < efi.systab->nr_tables; i++) { + efi_guid_t guid; + unsigned long table; + + if (efi_64bit) { + u64 table64; + guid = ((efi_config_table_64_t *)tablep)->guid; + table64 = ((efi_config_table_64_t *)tablep)->table; + table = table64; +#ifdef CONFIG_X86_32 + if (table64 >> 32) { + pr_cont("\n"); + pr_err("Table located above 4GB, disabling EFI.\n"); + early_iounmap(config_tables, + efi.systab->nr_tables * sz); + return -EINVAL; + } +#endif + } else { + guid = ((efi_config_table_32_t *)tablep)->guid; + table = ((efi_config_table_32_t *)tablep)->table; + } + if (!efi_guidcmp(guid, MPS_TABLE_GUID)) { + efi.mps = table; + pr_cont(" MPS=0x%lx ", table); + } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) { + efi.acpi20 = table; + pr_cont(" ACPI 2.0=0x%lx ", table); + } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) { + efi.acpi = table; + pr_cont(" ACPI=0x%lx ", table); + } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) { + efi.smbios = table; + pr_cont(" SMBIOS=0x%lx ", table); +#ifdef CONFIG_X86_UV + } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) { + efi.uv_systab = table; + pr_cont(" UVsystab=0x%lx ", table); +#endif + } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) { + efi.hcdp = table; + pr_cont(" HCDP=0x%lx ", table); + } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) { + efi.uga = table; + pr_cont(" UGA=0x%lx ", table); + } + tablep += sz; + } + pr_cont("\n"); + early_iounmap(config_tables, efi.systab->nr_tables * sz); + return 0; +} + +static int __init efi_runtime_init(void) +{ + efi_runtime_services_t *runtime; + + /* + * Check out the runtime services table. We need to map + * the runtime services table so that we can grab the physical + * address of several of the EFI runtime functions, needed to + * set the firmware into virtual mode. + */ + runtime = early_ioremap((unsigned long)efi.systab->runtime, + sizeof(efi_runtime_services_t)); + if (!runtime) { + pr_err("Could not map the runtime service table!\n"); + return -ENOMEM; + } + /* + * We will only need *early* access to the following + * two EFI runtime services before set_virtual_address_map + * is invoked. + */ + efi_phys.get_time = (efi_get_time_t *)runtime->get_time; + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + runtime->set_virtual_address_map; + /* + * Make efi_get_time can be called before entering + * virtual mode. + */ + efi.get_time = phys_efi_get_time; + early_iounmap(runtime, sizeof(efi_runtime_services_t)); + + return 0; +} + +static int __init efi_memmap_init(void) +{ + /* Map the EFI memory map */ + memmap.map = early_ioremap((unsigned long)memmap.phys_map, + memmap.nr_map * memmap.desc_size); + if (memmap.map == NULL) { + pr_err("Could not map the memory map!\n"); + return -ENOMEM; + } + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + + if (add_efi_memmap) + do_add_efi_memmap(); + + return 0; +} + +void __init efi_init(void) +{ + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i = 0; + void *tmp; + +#ifdef CONFIG_X86_32 + if (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi) { + pr_info("Table located above 4GB, disabling EFI.\n"); + efi_enabled = 0; + return; + } + efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; + efi_native = !efi_64bit; +#else + efi_phys.systab = (efi_system_table_t *) + (boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi<<32)); + efi_native = efi_64bit; +#endif + + if (efi_systab_init(efi_phys.systab)) { + efi_enabled = 0; + return; + } + + /* + * Show what we know for posterity + */ + c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); + if (c16) { + for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = *c16++; + vendor[i] = '\0'; + } else + pr_err("Could not map the firmware vendor!\n"); + early_iounmap(tmp, 2); + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) { + efi_enabled = 0; + return; + } + + /* + * Note: We currently don't support runtime services on an EFI + * that doesn't match the kernel 32/64-bit mode. + */ + + if (!efi_native) + pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); + else if (efi_runtime_init()) { + efi_enabled = 0; + return; + } + + if (efi_memmap_init()) { + efi_enabled = 0; + return; + } +#ifdef CONFIG_X86_32 + if (efi_native) { + x86_platform.get_wallclock = efi_get_time; + x86_platform.set_wallclock = efi_set_rtc_mmss; + } +#endif + +#if EFI_DEBUG + print_efi_memmap(); +#endif +} + +void __init efi_set_executable(efi_memory_desc_t *md, bool executable) +{ + u64 addr, npages; + + addr = md->virt_addr; + npages = md->num_pages; + + memrange_efi_to_native(&addr, &npages); + + if (executable) + set_memory_x(addr, npages); + else + set_memory_nx(addr, npages); +} + +static void __init runtime_code_page_mkexec(void) +{ + efi_memory_desc_t *md; + void *p; + + /* Make EFI runtime service code area executable */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + efi_set_executable(md, true); + } +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor and update + * that memory descriptor with the virtual address obtained from ioremap(). + * This enables the runtime services to be called without having to + * thunk back into physical mode for every invocation. + */ +void __init efi_enter_virtual_mode(void) +{ + efi_memory_desc_t *md, *prev_md = NULL; + efi_status_t status; + unsigned long size; + u64 end, systab, addr, npages, end_pfn; + void *p, *va, *new_memmap = NULL; + int count = 0; + + efi.systab = NULL; + + /* + * We don't do virtual mode, since we don't do runtime services, on + * non-native EFI + */ + + if (!efi_native) + goto out; + + /* Merge contiguous regions of the same type and attribute */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + u64 prev_size; + md = p; + + if (!prev_md) { + prev_md = md; + continue; + } + + if (prev_md->type != md->type || + prev_md->attribute != md->attribute) { + prev_md = md; + continue; + } + + prev_size = prev_md->num_pages << EFI_PAGE_SHIFT; + + if (md->phys_addr == (prev_md->phys_addr + prev_size)) { + prev_md->num_pages += md->num_pages; + md->type = EFI_RESERVED_TYPE; + md->attribute = 0; + continue; + } + prev_md = md; + } + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + size = md->num_pages << EFI_PAGE_SHIFT; + end = md->phys_addr + size; + + end_pfn = PFN_UP(end); + if (end_pfn <= max_low_pfn_mapped + || (end_pfn > (1UL << (32 - PAGE_SHIFT)) + && end_pfn <= max_pfn_mapped)) + va = __va(md->phys_addr); + else + va = efi_ioremap(md->phys_addr, size, md->type); + + md->virt_addr = (u64) (unsigned long) va; + + if (!va) { + pr_err("ioremap of 0x%llX failed!\n", + (unsigned long long)md->phys_addr); + continue; + } + + if (!(md->attribute & EFI_MEMORY_WB)) { + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); + } + + systab = (u64) (unsigned long) efi_phys.systab; + if (md->phys_addr <= systab && systab < end) { + systab += md->virt_addr - md->phys_addr; + efi.systab = (efi_system_table_t *) (unsigned long) systab; + } + new_memmap = krealloc(new_memmap, + (count + 1) * memmap.desc_size, + GFP_KERNEL); + memcpy(new_memmap + (count * memmap.desc_size), md, + memmap.desc_size); + count++; + } + + BUG_ON(!efi.systab); + + status = phys_efi_set_virtual_address_map( + memmap.desc_size * count, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)__pa(new_memmap)); + + if (status != EFI_SUCCESS) { + pr_alert("Unable to switch EFI into virtual mode " + "(status=%lx)!\n", status); + panic("EFI call to SetVirtualAddressMap() failed!"); + } + + /* + * Thankfully, it does seem that no runtime services other than + * SetVirtualAddressMap() will touch boot services code, so we can + * get rid of it all at this point + */ + efi_free_boot_services(); + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + * + * Call EFI services through wrapper functions. + */ + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; + efi.set_wakeup_time = virt_efi_set_wakeup_time; + efi.get_variable = virt_efi_get_variable; + efi.get_next_variable = virt_efi_get_next_variable; + efi.set_variable = virt_efi_set_variable; + efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; + efi.reset_system = virt_efi_reset_system; + efi.set_virtual_address_map = NULL; + efi.query_variable_info = virt_efi_query_variable_info; + efi.update_capsule = virt_efi_update_capsule; + efi.query_capsule_caps = virt_efi_query_capsule_caps; + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); + +out: + early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); + memmap.map = NULL; + kfree(new_memmap); +} + +/* + * Convenience functions to obtain memory types and attributes + */ +u32 efi_mem_type(unsigned long phys_addr) +{ + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if ((md->phys_addr <= phys_addr) && + (phys_addr < (md->phys_addr + + (md->num_pages << EFI_PAGE_SHIFT)))) + return md->type; + } + return 0; +} + +u64 efi_mem_attributes(unsigned long phys_addr) +{ + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if ((md->phys_addr <= phys_addr) && + (phys_addr < (md->phys_addr + + (md->num_pages << EFI_PAGE_SHIFT)))) + return md->attribute; + } + return 0; +} diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c new file mode 100644 index 00000000..40e44694 --- /dev/null +++ b/arch/x86/platform/efi/efi_32.c @@ -0,0 +1,69 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: <goutham.rao@intel.com> + * Skip non-WB memory and ignore empty memory ranges. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/ioport.h> +#include <linux/efi.h> + +#include <asm/io.h> +#include <asm/desc.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/efi.h> + +/* + * To make EFI call EFI runtime service in physical addressing mode we need + * prelog/epilog before/after the invocation to disable interrupt, to + * claim EFI runtime service handler exclusively and to duplicate a memory in + * low memory space say 0 - 3G. + */ + +static unsigned long efi_rt_eflags; + +void efi_call_phys_prelog(void) +{ + struct desc_ptr gdt_descr; + + local_irq_save(efi_rt_eflags); + + load_cr3(initial_page_table); + __flush_tlb_all(); + + gdt_descr.address = __pa(get_cpu_gdt_table(0)); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); +} + +void efi_call_phys_epilog(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + + load_cr3(swapper_pg_dir); + __flush_tlb_all(); + + local_irq_restore(efi_rt_eflags); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c new file mode 100644 index 00000000..ac3aa54e --- /dev/null +++ b/arch/x86/platform/efi/efi_64.c @@ -0,0 +1,99 @@ +/* + * x86_64 specific EFI support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu <fenghua.yu@intel.com> + * Bibo Mao <bibo.mao@intel.com> + * Chandramouli Narayanan <mouli@linux.intel.com> + * Huang Ying <ying.huang@intel.com> + * + * Code to convert EFI to E820 map has been implemented in elilo bootloader + * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table + * is setup appropriately for EFI runtime code. + * - mouli 06/14/2007. + * + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/bootmem.h> +#include <linux/ioport.h> +#include <linux/module.h> +#include <linux/efi.h> +#include <linux/uaccess.h> +#include <linux/io.h> +#include <linux/reboot.h> + +#include <asm/setup.h> +#include <asm/page.h> +#include <asm/e820.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/proto.h> +#include <asm/efi.h> +#include <asm/cacheflush.h> +#include <asm/fixmap.h> + +static pgd_t save_pgd __initdata; +static unsigned long efi_flags __initdata; + +static void __init early_code_mapping_set_exec(int executable) +{ + efi_memory_desc_t *md; + void *p; + + if (!(__supported_pte_mask & _PAGE_NX)) + return; + + /* Make EFI service code area executable */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) + efi_set_executable(md, executable); + } +} + +void __init efi_call_phys_prelog(void) +{ + unsigned long vaddress; + + early_code_mapping_set_exec(1); + local_irq_save(efi_flags); + vaddress = (unsigned long)__va(0x0UL); + save_pgd = *pgd_offset_k(0x0UL); + set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); + __flush_tlb_all(); +} + +void __init efi_call_phys_epilog(void) +{ + /* + * After the lock is released, the original page table is restored. + */ + set_pgd(pgd_offset_k(0x0UL), save_pgd); + __flush_tlb_all(); + local_irq_restore(efi_flags); + early_code_mapping_set_exec(0); +} + +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type) +{ + unsigned long last_map_pfn; + + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { + unsigned long top = last_map_pfn << PAGE_SHIFT; + efi_ioremap(top, size - (top - phys_addr), type); + } + + return (void __iomem *)__va(phys_addr); +} diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S new file mode 100644 index 00000000..fbe66e62 --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -0,0 +1,123 @@ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. + */ + +#include <linux/linkage.h> +#include <asm/page_types.h> + +/* + * efi_call_phys(void *, ...) is a function with variable parameters. + * All the callers of this function assure that all the parameters are 4-bytes. + */ + +/* + * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. + * So we'd better save all of them at the beginning of this function and restore + * at the end no matter how many we use, because we can not assure EFI runtime + * service functions will comply with gcc calling convention, too. + */ + +.text +ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been + * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found + * the values of these registers are the same. And, the corresponding + * GDT entries are identical. So I will do nothing about segment reg + * and GDT, but change GDT base register in prelog and epilog. + */ + + /* + * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. + * But to make it smoothly switch from virtual mode to flat mode. + * The mapping of lower virtual memory has been created in prelog and + * epilog. + */ + movl $1f, %edx + subl $__PAGE_OFFSET, %edx + jmp *%edx +1: + + /* + * 2. Now on the top of stack is the return + * address in the caller of efi_call_phys(), then parameter 1, + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ + popl %edx + movl %edx, saved_return_addr + /* get the function pointer into ECX*/ + popl %ecx + movl %ecx, efi_rt_function_ptr + movl $2f, %edx + subl $__PAGE_OFFSET, %edx + pushl %edx + + /* + * 3. Clear PG bit in %CR0. + */ + movl %cr0, %edx + andl $0x7fffffff, %edx + movl %edx, %cr0 + jmp 1f +1: + + /* + * 4. Adjust stack pointer. + */ + subl $__PAGE_OFFSET, %esp + + /* + * 5. Call the physical function. + */ + jmp *%ecx + +2: + /* + * 6. After EFI runtime service returns, control will return to + * following instruction. We'd better readjust stack pointer first. + */ + addl $__PAGE_OFFSET, %esp + + /* + * 7. Restore PG bit + */ + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f +1: + /* + * 8. Now restore the virtual mode from flat mode by + * adding EIP with PAGE_OFFSET. + */ + movl $1f, %edx + jmp *%edx +1: + + /* + * 9. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. + */ + leal efi_rt_function_ptr, %edx + movl (%edx), %ecx + pushl %ecx + + /* + * 10. Push the saved return address onto the stack and return. + */ + leal saved_return_addr, %edx + movl (%edx), %ecx + pushl %ecx + ret +ENDPROC(efi_call_phys) +.previous + +.data +saved_return_addr: + .long 0 +efi_rt_function_ptr: + .long 0 diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S new file mode 100644 index 00000000..4c07ccab --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -0,0 +1,116 @@ +/* + * Function calling ABI conversion from Linux to EFI for x86_64 + * + * Copyright (C) 2007 Intel Corp + * Bibo Mao <bibo.mao@intel.com> + * Huang Ying <ying.huang@intel.com> + */ + +#include <linux/linkage.h> + +#define SAVE_XMM \ + mov %rsp, %rax; \ + subq $0x70, %rsp; \ + and $~0xf, %rsp; \ + mov %rax, (%rsp); \ + mov %cr0, %rax; \ + clts; \ + mov %rax, 0x8(%rsp); \ + movaps %xmm0, 0x60(%rsp); \ + movaps %xmm1, 0x50(%rsp); \ + movaps %xmm2, 0x40(%rsp); \ + movaps %xmm3, 0x30(%rsp); \ + movaps %xmm4, 0x20(%rsp); \ + movaps %xmm5, 0x10(%rsp) + +#define RESTORE_XMM \ + movaps 0x60(%rsp), %xmm0; \ + movaps 0x50(%rsp), %xmm1; \ + movaps 0x40(%rsp), %xmm2; \ + movaps 0x30(%rsp), %xmm3; \ + movaps 0x20(%rsp), %xmm4; \ + movaps 0x10(%rsp), %xmm5; \ + mov 0x8(%rsp), %rsi; \ + mov %rsi, %cr0; \ + mov (%rsp), %rsp + +ENTRY(efi_call0) + SAVE_XMM + subq $32, %rsp + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call0) + +ENTRY(efi_call1) + SAVE_XMM + subq $32, %rsp + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call1) + +ENTRY(efi_call2) + SAVE_XMM + subq $32, %rsp + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call2) + +ENTRY(efi_call3) + SAVE_XMM + subq $32, %rsp + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call3) + +ENTRY(efi_call4) + SAVE_XMM + subq $32, %rsp + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call4) + +ENTRY(efi_call5) + SAVE_XMM + subq $48, %rsp + mov %r9, 32(%rsp) + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $48, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call5) + +ENTRY(efi_call6) + SAVE_XMM + mov (%rsp), %rax + mov 8(%rax), %rax + subq $48, %rsp + mov %r9, 32(%rsp) + mov %rax, 40(%rsp) + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $48, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call6) diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile new file mode 100644 index 00000000..5b51194f --- /dev/null +++ b/arch/x86/platform/geode/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_ALIX) += alix.o +obj-$(CONFIG_NET5501) += net5501.o +obj-$(CONFIG_GEOS) += geos.o diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c new file mode 100644 index 00000000..90e23e76 --- /dev/null +++ b/arch/x86/platform/geode/alix.c @@ -0,0 +1,200 @@ +/* + * System Specific setup for PCEngines ALIX. + * At the moment this means setup of GPIO control of LEDs + * on Alix.2/3/6 boards. + * + * + * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> + * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> + * and Philip Prindeville <philipp@redfish-solutions.com> + * + * TODO: There are large similarities with leds-net5501.c + * by Alessandro Zummo <a.zummo@towertech.it> + * In the future leds-net5501.c should be migrated over to platform + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/string.h> +#include <linux/module.h> +#include <linux/leds.h> +#include <linux/platform_device.h> +#include <linux/gpio.h> +#include <linux/input.h> +#include <linux/gpio_keys.h> +#include <linux/dmi.h> + +#include <asm/geode.h> + +#define BIOS_SIGNATURE_TINYBIOS 0xf0000 +#define BIOS_SIGNATURE_COREBOOT 0x500 +#define BIOS_REGION_SIZE 0x10000 + +static bool force = 0; +module_param(force, bool, 0444); +/* FIXME: Award bios is not automatically detected as Alix platform */ +MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); + +static struct gpio_keys_button alix_gpio_buttons[] = { + { + .code = KEY_RESTART, + .gpio = 24, + .active_low = 1, + .desc = "Reset button", + .type = EV_KEY, + .wakeup = 0, + .debounce_interval = 100, + .can_disable = 0, + } +}; +static struct gpio_keys_platform_data alix_buttons_data = { + .buttons = alix_gpio_buttons, + .nbuttons = ARRAY_SIZE(alix_gpio_buttons), + .poll_interval = 20, +}; + +static struct platform_device alix_buttons_dev = { + .name = "gpio-keys-polled", + .id = 1, + .dev = { + .platform_data = &alix_buttons_data, + } +}; + +static struct gpio_led alix_leds[] = { + { + .name = "alix:1", + .gpio = 6, + .default_trigger = "default-on", + .active_low = 1, + }, + { + .name = "alix:2", + .gpio = 25, + .default_trigger = "default-off", + .active_low = 1, + }, + { + .name = "alix:3", + .gpio = 27, + .default_trigger = "default-off", + .active_low = 1, + }, +}; + +static struct gpio_led_platform_data alix_leds_data = { + .num_leds = ARRAY_SIZE(alix_leds), + .leds = alix_leds, +}; + +static struct platform_device alix_leds_dev = { + .name = "leds-gpio", + .id = -1, + .dev.platform_data = &alix_leds_data, +}; + +static struct __initdata platform_device *alix_devs[] = { + &alix_buttons_dev, + &alix_leds_dev, +}; + +static void __init register_alix(void) +{ + /* Setup LED control through leds-gpio driver */ + platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs)); +} + +static bool __init alix_present(unsigned long bios_phys, + const char *alix_sig, + size_t alix_sig_len) +{ + const size_t bios_len = BIOS_REGION_SIZE; + const char *bios_virt; + const char *scan_end; + const char *p; + char name[64]; + + if (force) { + printk(KERN_NOTICE "%s: forced to skip BIOS test, " + "assume system is ALIX.2/ALIX.3\n", + KBUILD_MODNAME); + return true; + } + + bios_virt = phys_to_virt(bios_phys); + scan_end = bios_virt + bios_len - (alix_sig_len + 2); + for (p = bios_virt; p < scan_end; p++) { + const char *tail; + char *a; + + if (memcmp(p, alix_sig, alix_sig_len) != 0) + continue; + + memcpy(name, p, sizeof(name)); + + /* remove the first \0 character from string */ + a = strchr(name, '\0'); + if (a) + *a = ' '; + + /* cut the string at a newline */ + a = strchr(name, '\r'); + if (a) + *a = '\0'; + + tail = p + alix_sig_len; + if ((tail[0] == '2' || tail[0] == '3' || tail[0] == '6')) { + printk(KERN_INFO + "%s: system is recognized as \"%s\"\n", + KBUILD_MODNAME, name); + return true; + } + } + + return false; +} + +static bool __init alix_present_dmi(void) +{ + const char *vendor, *product; + + vendor = dmi_get_system_info(DMI_SYS_VENDOR); + if (!vendor || strcmp(vendor, "PC Engines")) + return false; + + product = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!product || (strcmp(product, "ALIX.2D") && strcmp(product, "ALIX.6"))) + return false; + + printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n", + KBUILD_MODNAME, vendor, product); + + return true; +} + +static int __init alix_init(void) +{ + const char tinybios_sig[] = "PC Engines ALIX."; + const char coreboot_sig[] = "PC Engines\0ALIX."; + + if (!is_geode()) + return 0; + + if (alix_present(BIOS_SIGNATURE_TINYBIOS, tinybios_sig, sizeof(tinybios_sig) - 1) || + alix_present(BIOS_SIGNATURE_COREBOOT, coreboot_sig, sizeof(coreboot_sig) - 1) || + alix_present_dmi()) + register_alix(); + + return 0; +} + +module_init(alix_init); + +MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>"); +MODULE_DESCRIPTION("PCEngines ALIX System Setup"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c new file mode 100644 index 00000000..c2e6d535 --- /dev/null +++ b/arch/x86/platform/geode/geos.c @@ -0,0 +1,128 @@ +/* + * System Specific setup for Traverse Technologies GEOS. + * At the moment this means setup of GPIO control of LEDs. + * + * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> + * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> + * and Philip Prindeville <philipp@redfish-solutions.com> + * + * TODO: There are large similarities with leds-net5501.c + * by Alessandro Zummo <a.zummo@towertech.it> + * In the future leds-net5501.c should be migrated over to platform + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/string.h> +#include <linux/module.h> +#include <linux/leds.h> +#include <linux/platform_device.h> +#include <linux/gpio.h> +#include <linux/input.h> +#include <linux/gpio_keys.h> +#include <linux/dmi.h> + +#include <asm/geode.h> + +static struct gpio_keys_button geos_gpio_buttons[] = { + { + .code = KEY_RESTART, + .gpio = 3, + .active_low = 1, + .desc = "Reset button", + .type = EV_KEY, + .wakeup = 0, + .debounce_interval = 100, + .can_disable = 0, + } +}; +static struct gpio_keys_platform_data geos_buttons_data = { + .buttons = geos_gpio_buttons, + .nbuttons = ARRAY_SIZE(geos_gpio_buttons), + .poll_interval = 20, +}; + +static struct platform_device geos_buttons_dev = { + .name = "gpio-keys-polled", + .id = 1, + .dev = { + .platform_data = &geos_buttons_data, + } +}; + +static struct gpio_led geos_leds[] = { + { + .name = "geos:1", + .gpio = 6, + .default_trigger = "default-on", + .active_low = 1, + }, + { + .name = "geos:2", + .gpio = 25, + .default_trigger = "default-off", + .active_low = 1, + }, + { + .name = "geos:3", + .gpio = 27, + .default_trigger = "default-off", + .active_low = 1, + }, +}; + +static struct gpio_led_platform_data geos_leds_data = { + .num_leds = ARRAY_SIZE(geos_leds), + .leds = geos_leds, +}; + +static struct platform_device geos_leds_dev = { + .name = "leds-gpio", + .id = -1, + .dev.platform_data = &geos_leds_data, +}; + +static struct __initdata platform_device *geos_devs[] = { + &geos_buttons_dev, + &geos_leds_dev, +}; + +static void __init register_geos(void) +{ + /* Setup LED control through leds-gpio driver */ + platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs)); +} + +static int __init geos_init(void) +{ + const char *vendor, *product; + + if (!is_geode()) + return 0; + + vendor = dmi_get_system_info(DMI_SYS_VENDOR); + if (!vendor || strcmp(vendor, "Traverse Technologies")) + return 0; + + product = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!product || strcmp(product, "Geos")) + return 0; + + printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n", + KBUILD_MODNAME, vendor, product); + + register_geos(); + + return 0; +} + +module_init(geos_init); + +MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); +MODULE_DESCRIPTION("Traverse Technologies Geos System Setup"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c new file mode 100644 index 00000000..646e3b5b --- /dev/null +++ b/arch/x86/platform/geode/net5501.c @@ -0,0 +1,154 @@ +/* + * System Specific setup for Soekris net5501 + * At the moment this means setup of GPIO control of LEDs and buttons + * on net5501 boards. + * + * + * Copyright (C) 2008-2009 Tower Technologies + * Written by Alessandro Zummo <a.zummo@towertech.it> + * + * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> + * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> + * and Philip Prindeville <philipp@redfish-solutions.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/string.h> +#include <linux/module.h> +#include <linux/leds.h> +#include <linux/platform_device.h> +#include <linux/gpio.h> +#include <linux/input.h> +#include <linux/gpio_keys.h> + +#include <asm/geode.h> + +#define BIOS_REGION_BASE 0xffff0000 +#define BIOS_REGION_SIZE 0x00010000 + +static struct gpio_keys_button net5501_gpio_buttons[] = { + { + .code = KEY_RESTART, + .gpio = 24, + .active_low = 1, + .desc = "Reset button", + .type = EV_KEY, + .wakeup = 0, + .debounce_interval = 100, + .can_disable = 0, + } +}; +static struct gpio_keys_platform_data net5501_buttons_data = { + .buttons = net5501_gpio_buttons, + .nbuttons = ARRAY_SIZE(net5501_gpio_buttons), + .poll_interval = 20, +}; + +static struct platform_device net5501_buttons_dev = { + .name = "gpio-keys-polled", + .id = 1, + .dev = { + .platform_data = &net5501_buttons_data, + } +}; + +static struct gpio_led net5501_leds[] = { + { + .name = "net5501:1", + .gpio = 6, + .default_trigger = "default-on", + .active_low = 0, + }, +}; + +static struct gpio_led_platform_data net5501_leds_data = { + .num_leds = ARRAY_SIZE(net5501_leds), + .leds = net5501_leds, +}; + +static struct platform_device net5501_leds_dev = { + .name = "leds-gpio", + .id = -1, + .dev.platform_data = &net5501_leds_data, +}; + +static struct __initdata platform_device *net5501_devs[] = { + &net5501_buttons_dev, + &net5501_leds_dev, +}; + +static void __init register_net5501(void) +{ + /* Setup LED control through leds-gpio driver */ + platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs)); +} + +struct net5501_board { + u16 offset; + u16 len; + char *sig; +}; + +static struct net5501_board __initdata boards[] = { + { 0xb7b, 7, "net5501" }, /* net5501 v1.33/1.33c */ + { 0xb1f, 7, "net5501" }, /* net5501 v1.32i */ +}; + +static bool __init net5501_present(void) +{ + int i; + unsigned char *rombase, *bios; + bool found = false; + + rombase = ioremap(BIOS_REGION_BASE, BIOS_REGION_SIZE - 1); + if (!rombase) { + printk(KERN_ERR "%s: failed to get rombase\n", KBUILD_MODNAME); + return found; + } + + bios = rombase + 0x20; /* null terminated */ + + if (memcmp(bios, "comBIOS", 7)) + goto unmap; + + for (i = 0; i < ARRAY_SIZE(boards); i++) { + unsigned char *model = rombase + boards[i].offset; + + if (!memcmp(model, boards[i].sig, boards[i].len)) { + printk(KERN_INFO "%s: system is recognized as \"%s\"\n", + KBUILD_MODNAME, model); + + found = true; + break; + } + } + +unmap: + iounmap(rombase); + return found; +} + +static int __init net5501_init(void) +{ + if (!is_geode()) + return 0; + + if (!net5501_present()) + return 0; + + register_net5501(); + + return 0; +} + +module_init(net5501_init); + +MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); +MODULE_DESCRIPTION("Soekris net5501 System Setup"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile new file mode 100644 index 00000000..db921983 --- /dev/null +++ b/arch/x86/platform/iris/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_32_IRIS) += iris.o diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c new file mode 100644 index 00000000..5917eb56 --- /dev/null +++ b/arch/x86/platform/iris/iris.c @@ -0,0 +1,91 @@ +/* + * Eurobraille/Iris power off support. + * + * Eurobraille's Iris machine is a PC with no APM or ACPI support. + * It is shutdown by a special I/O sequence which this module provides. + * + * Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org> + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the program ; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/moduleparam.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/pm.h> +#include <asm/io.h> + +#define IRIS_GIO_BASE 0x340 +#define IRIS_GIO_INPUT IRIS_GIO_BASE +#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1) +#define IRIS_GIO_PULSE 0x80 /* First byte to send */ +#define IRIS_GIO_REST 0x00 /* Second byte to send */ +#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>"); +MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); +MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); + +static bool force; + +module_param(force, bool, 0); +MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); + +static void (*old_pm_power_off)(void); + +static void iris_power_off(void) +{ + outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT); + msleep(850); + outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT); +} + +/* + * Before installing the power_off handler, try to make sure the OS is + * running on an Iris. Since Iris does not support DMI, this is done + * by reading its input port and seeing whether the read value is + * meaningful. + */ +static int iris_init(void) +{ + unsigned char status; + if (force != 1) { + printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n"); + return -ENODEV; + } + status = inb(IRIS_GIO_INPUT); + if (status == IRIS_GIO_NODEV) { + printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n"); + return -ENODEV; + } + old_pm_power_off = pm_power_off; + pm_power_off = &iris_power_off; + printk(KERN_INFO "Iris power_off handler installed.\n"); + + return 0; +} + +static void iris_exit(void) +{ + pm_power_off = old_pm_power_off; + printk(KERN_INFO "Iris power_off handler uninstalled.\n"); +} + +module_init(iris_init); +module_exit(iris_exit); diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile new file mode 100644 index 00000000..af1da7e6 --- /dev/null +++ b/arch/x86/platform/mrst/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_X86_INTEL_MID) += mrst.o +obj-$(CONFIG_X86_INTEL_MID) += vrtc.o +obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c new file mode 100644 index 00000000..3c6e3284 --- /dev/null +++ b/arch/x86/platform/mrst/early_printk_mrst.c @@ -0,0 +1,327 @@ +/* + * early_printk_mrst.c - early consoles for Intel MID platforms + * + * Copyright (c) 2008-2010, Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +/* + * This file implements two early consoles named mrst and hsu. + * mrst is based on Maxim3110 spi-uart device, it exists in both + * Moorestown and Medfield platforms, while hsu is based on a High + * Speed UART device which only exists in the Medfield platform + */ + +#include <linux/serial_reg.h> +#include <linux/serial_mfd.h> +#include <linux/kmsg_dump.h> +#include <linux/console.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/io.h> + +#include <asm/fixmap.h> +#include <asm/pgtable.h> +#include <asm/mrst.h> + +#define MRST_SPI_TIMEOUT 0x200000 +#define MRST_REGBASE_SPI0 0xff128000 +#define MRST_REGBASE_SPI1 0xff128400 +#define MRST_CLK_SPI0_REG 0xff11d86c + +/* Bit fields in CTRLR0 */ +#define SPI_DFS_OFFSET 0 + +#define SPI_FRF_OFFSET 4 +#define SPI_FRF_SPI 0x0 +#define SPI_FRF_SSP 0x1 +#define SPI_FRF_MICROWIRE 0x2 +#define SPI_FRF_RESV 0x3 + +#define SPI_MODE_OFFSET 6 +#define SPI_SCPH_OFFSET 6 +#define SPI_SCOL_OFFSET 7 +#define SPI_TMOD_OFFSET 8 +#define SPI_TMOD_TR 0x0 /* xmit & recv */ +#define SPI_TMOD_TO 0x1 /* xmit only */ +#define SPI_TMOD_RO 0x2 /* recv only */ +#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */ + +#define SPI_SLVOE_OFFSET 10 +#define SPI_SRL_OFFSET 11 +#define SPI_CFS_OFFSET 12 + +/* Bit fields in SR, 7 bits */ +#define SR_MASK 0x7f /* cover 7 bits */ +#define SR_BUSY (1 << 0) +#define SR_TF_NOT_FULL (1 << 1) +#define SR_TF_EMPT (1 << 2) +#define SR_RF_NOT_EMPT (1 << 3) +#define SR_RF_FULL (1 << 4) +#define SR_TX_ERR (1 << 5) +#define SR_DCOL (1 << 6) + +struct dw_spi_reg { + u32 ctrl0; + u32 ctrl1; + u32 ssienr; + u32 mwcr; + u32 ser; + u32 baudr; + u32 txfltr; + u32 rxfltr; + u32 txflr; + u32 rxflr; + u32 sr; + u32 imr; + u32 isr; + u32 risr; + u32 txoicr; + u32 rxoicr; + u32 rxuicr; + u32 msticr; + u32 icr; + u32 dmacr; + u32 dmatdlr; + u32 dmardlr; + u32 idr; + u32 version; + + /* Currently operates as 32 bits, though only the low 16 bits matter */ + u32 dr; +} __packed; + +#define dw_readl(dw, name) __raw_readl(&(dw)->name) +#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name) + +/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */ +static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0; + +static u32 *pclk_spi0; +/* Always contains an accessible address, start with 0 */ +static struct dw_spi_reg *pspi; + +static struct kmsg_dumper dw_dumper; +static int dumper_registered; + +static void dw_kmsg_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason, + const char *s1, unsigned long l1, + const char *s2, unsigned long l2) +{ + int i; + + /* When run to this, we'd better re-init the HW */ + mrst_early_console_init(); + + for (i = 0; i < l1; i++) + early_mrst_console.write(&early_mrst_console, s1 + i, 1); + for (i = 0; i < l2; i++) + early_mrst_console.write(&early_mrst_console, s2 + i, 1); +} + +/* Set the ratio rate to 115200, 8n1, IRQ disabled */ +static void max3110_write_config(void) +{ + u16 config; + + config = 0xc001; + dw_writel(pspi, dr, config); +} + +/* Translate char to a eligible word and send to max3110 */ +static void max3110_write_data(char c) +{ + u16 data; + + data = 0x8000 | c; + dw_writel(pspi, dr, data); +} + +void mrst_early_console_init(void) +{ + u32 ctrlr0 = 0; + u32 spi0_cdiv; + u32 freq; /* Freqency info only need be searched once */ + + /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */ + pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, + MRST_CLK_SPI0_REG); + spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9; + freq = 100000000 / (spi0_cdiv + 1); + + if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL) + mrst_spi_paddr = MRST_REGBASE_SPI1; + + pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, + mrst_spi_paddr); + + /* Disable SPI controller */ + dw_writel(pspi, ssienr, 0); + + /* Set control param, 8 bits, transmit only mode */ + ctrlr0 = dw_readl(pspi, ctrl0); + + ctrlr0 &= 0xfcc0; + ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET) + | (SPI_TMOD_TO << SPI_TMOD_OFFSET); + dw_writel(pspi, ctrl0, ctrlr0); + + /* + * Change the spi0 clk to comply with 115200 bps, use 100000 to + * calculate the clk dividor to make the clock a little slower + * than real baud rate. + */ + dw_writel(pspi, baudr, freq/100000); + + /* Disable all INT for early phase */ + dw_writel(pspi, imr, 0x0); + + /* Set the cs to spi-uart */ + dw_writel(pspi, ser, 0x2); + + /* Enable the HW, the last step for HW init */ + dw_writel(pspi, ssienr, 0x1); + + /* Set the default configuration */ + max3110_write_config(); + + /* Register the kmsg dumper */ + if (!dumper_registered) { + dw_dumper.dump = dw_kmsg_dump; + kmsg_dump_register(&dw_dumper); + dumper_registered = 1; + } +} + +/* Slave select should be called in the read/write function */ +static void early_mrst_spi_putc(char c) +{ + unsigned int timeout; + u32 sr; + + timeout = MRST_SPI_TIMEOUT; + /* Early putc needs to make sure the TX FIFO is not full */ + while (--timeout) { + sr = dw_readl(pspi, sr); + if (!(sr & SR_TF_NOT_FULL)) + cpu_relax(); + else + break; + } + + if (!timeout) + pr_warning("MRST earlycon: timed out\n"); + else + max3110_write_data(c); +} + +/* Early SPI only uses polling mode */ +static void early_mrst_spi_write(struct console *con, const char *str, unsigned n) +{ + int i; + + for (i = 0; i < n && *str; i++) { + if (*str == '\n') + early_mrst_spi_putc('\r'); + early_mrst_spi_putc(*str); + str++; + } +} + +struct console early_mrst_console = { + .name = "earlymrst", + .write = early_mrst_spi_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +/* + * Following is the early console based on Medfield HSU (High + * Speed UART) device. + */ +#define HSU_PORT_BASE 0xffa28080 + +static void __iomem *phsu; + +void hsu_early_console_init(const char *s) +{ + unsigned long paddr, port = 0; + u8 lcr; + + /* + * Select the early HSU console port if specified by user in the + * kernel command line. + */ + if (*s && !kstrtoul(s, 10, &port)) + port = clamp_val(port, 0, 2); + + paddr = HSU_PORT_BASE + port * 0x80; + phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); + + /* Disable FIFO */ + writeb(0x0, phsu + UART_FCR); + + /* Set to default 115200 bps, 8n1 */ + lcr = readb(phsu + UART_LCR); + writeb((0x80 | lcr), phsu + UART_LCR); + writeb(0x18, phsu + UART_DLL); + writeb(lcr, phsu + UART_LCR); + writel(0x3600, phsu + UART_MUL*4); + + writeb(0x8, phsu + UART_MCR); + writeb(0x7, phsu + UART_FCR); + writeb(0x3, phsu + UART_LCR); + + /* Clear IRQ status */ + readb(phsu + UART_LSR); + readb(phsu + UART_RX); + readb(phsu + UART_IIR); + readb(phsu + UART_MSR); + + /* Enable FIFO */ + writeb(0x7, phsu + UART_FCR); +} + +#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) + +static void early_hsu_putc(char ch) +{ + unsigned int timeout = 10000; /* 10ms */ + u8 status; + + while (--timeout) { + status = readb(phsu + UART_LSR); + if (status & BOTH_EMPTY) + break; + udelay(1); + } + + /* Only write the char when there was no timeout */ + if (timeout) + writeb(ch, phsu + UART_TX); +} + +static void early_hsu_write(struct console *con, const char *str, unsigned n) +{ + int i; + + for (i = 0; i < n && *str; i++) { + if (*str == '\n') + early_hsu_putc('\r'); + early_hsu_putc(*str); + str++; + } +} + +struct console early_hsu_console = { + .name = "earlyhsu", + .write = early_hsu_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c new file mode 100644 index 00000000..e31bcd8f --- /dev/null +++ b/arch/x86/platform/mrst/mrst.c @@ -0,0 +1,1053 @@ +/* + * mrst.c: Intel Moorestown platform specific setup code + * + * (C) Copyright 2008 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#define pr_fmt(fmt) "mrst: " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/scatterlist.h> +#include <linux/sfi.h> +#include <linux/intel_pmic_gpio.h> +#include <linux/spi/spi.h> +#include <linux/i2c.h> +#include <linux/i2c/pca953x.h> +#include <linux/gpio_keys.h> +#include <linux/input.h> +#include <linux/platform_device.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/mfd/intel_msic.h> +#include <linux/gpio.h> +#include <linux/i2c/tc35876x.h> + +#include <asm/setup.h> +#include <asm/mpspec_def.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/io_apic.h> +#include <asm/mrst.h> +#include <asm/mrst-vrtc.h> +#include <asm/io.h> +#include <asm/i8259.h> +#include <asm/intel_scu_ipc.h> +#include <asm/apb_timer.h> +#include <asm/reboot.h> + +/* + * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, + * cmdline option x86_mrst_timer can be used to override the configuration + * to prefer one or the other. + * at runtime, there are basically three timer configurations: + * 1. per cpu apbt clock only + * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only + * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast. + * + * by default (without cmdline option), platform code first detects cpu type + * to see if we are on lincroft or penwell, then set up both lapic or apbt + * clocks accordingly. + * i.e. by default, medfield uses configuration #2, moorestown uses #1. + * config #3 is supported but not recommended on medfield. + * + * rating and feature summary: + * lapic (with C3STOP) --------- 100 + * apbt (always-on) ------------ 110 + * lapic (always-on,ARAT) ------ 150 + */ + +__cpuinitdata enum mrst_timer_options mrst_timer_options; + +static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; +static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; +enum mrst_cpu_type __mrst_cpu_chip; +EXPORT_SYMBOL_GPL(__mrst_cpu_chip); + +int sfi_mtimer_num; + +struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; +EXPORT_SYMBOL_GPL(sfi_mrtc_array); +int sfi_mrtc_num; + +static void mrst_power_off(void) +{ +} + +static void mrst_reboot(void) +{ + intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); +} + +/* parse all the mtimer info to a static mtimer array */ +static int __init sfi_parse_mtmr(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_timer_table_entry *pentry; + struct mpc_intsrc mp_irq; + int totallen; + + sb = (struct sfi_table_simple *)table; + if (!sfi_mtimer_num) { + sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb, + struct sfi_timer_table_entry); + pentry = (struct sfi_timer_table_entry *) sb->pentry; + totallen = sfi_mtimer_num * sizeof(*pentry); + memcpy(sfi_mtimer_array, pentry, totallen); + } + + pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num); + pentry = sfi_mtimer_array; + for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { + pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz," + " irq = %d\n", totallen, (u32)pentry->phys_addr, + pentry->freq_hz, pentry->irq); + if (!pentry->irq) + continue; + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; +/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ + mp_irq.irqflag = 5; + mp_irq.srcbus = MP_BUS_ISA; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + mp_save_irq(&mp_irq); + } + + return 0; +} + +struct sfi_timer_table_entry *sfi_get_mtmr(int hint) +{ + int i; + if (hint < sfi_mtimer_num) { + if (!sfi_mtimer_usage[hint]) { + pr_debug("hint taken for timer %d irq %d\n",\ + hint, sfi_mtimer_array[hint].irq); + sfi_mtimer_usage[hint] = 1; + return &sfi_mtimer_array[hint]; + } + } + /* take the first timer available */ + for (i = 0; i < sfi_mtimer_num;) { + if (!sfi_mtimer_usage[i]) { + sfi_mtimer_usage[i] = 1; + return &sfi_mtimer_array[i]; + } + i++; + } + return NULL; +} + +void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) +{ + int i; + for (i = 0; i < sfi_mtimer_num;) { + if (mtmr->irq == sfi_mtimer_array[i].irq) { + sfi_mtimer_usage[i] = 0; + return; + } + i++; + } +} + +/* parse all the mrtc info to a global mrtc array */ +int __init sfi_parse_mrtc(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_rtc_table_entry *pentry; + struct mpc_intsrc mp_irq; + + int totallen; + + sb = (struct sfi_table_simple *)table; + if (!sfi_mrtc_num) { + sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb, + struct sfi_rtc_table_entry); + pentry = (struct sfi_rtc_table_entry *)sb->pentry; + totallen = sfi_mrtc_num * sizeof(*pentry); + memcpy(sfi_mrtc_array, pentry, totallen); + } + + pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num); + pentry = sfi_mrtc_array; + for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { + pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", + totallen, (u32)pentry->phys_addr, pentry->irq); + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = 0xf; /* level trigger and active low */ + mp_irq.srcbus = MP_BUS_ISA; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + mp_save_irq(&mp_irq); + } + return 0; +} + +static unsigned long __init mrst_calibrate_tsc(void) +{ + unsigned long fast_calibrate; + u32 lo, hi, ratio, fsb; + + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); + ratio = (hi >> 8) & 0x1f; + pr_debug("ratio is %d\n", ratio); + if (!ratio) { + pr_err("read a zero ratio, should be incorrect!\n"); + pr_err("force tsc ratio to 16 ...\n"); + ratio = 16; + } + rdmsr(MSR_FSB_FREQ, lo, hi); + if ((lo & 0x7) == 0x7) + fsb = PENWELL_FSB_FREQ_83SKU; + else + fsb = PENWELL_FSB_FREQ_100SKU; + fast_calibrate = ratio * fsb; + pr_debug("read penwell tsc %lu khz\n", fast_calibrate); + lapic_timer_frequency = fsb * 1000 / HZ; + /* mark tsc clocksource as reliable */ + set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); + + if (fast_calibrate) + return fast_calibrate; + + return 0; +} + +static void __init mrst_time_init(void) +{ + sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); + switch (mrst_timer_options) { + case MRST_TIMER_APBT_ONLY: + break; + case MRST_TIMER_LAPIC_APBT: + x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; + x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; + break; + default: + if (!boot_cpu_has(X86_FEATURE_ARAT)) + break; + x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; + x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; + return; + } + /* we need at least one APB timer */ + pre_init_apic_IRQ0(); + apbt_time_init(); +} + +static void __cpuinit mrst_arch_setup(void) +{ + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) + __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; + else { + pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); + __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; + } +} + +/* MID systems don't have i8042 controller */ +static int mrst_i8042_detect(void) +{ + return 0; +} + +/* + * Moorestown does not have external NMI source nor port 0x61 to report + * NMI status. The possible NMI sources are from pmu as a result of NMI + * watchdog or lock debug. Reading io port 0x61 results in 0xff which + * misled NMI handler. + */ +static unsigned char mrst_get_nmi_reason(void) +{ + return 0; +} + +/* + * Moorestown specific x86_init function overrides and early setup + * calls. + */ +void __init x86_mrst_early_setup(void) +{ + x86_init.resources.probe_roms = x86_init_noop; + x86_init.resources.reserve_resources = x86_init_noop; + + x86_init.timers.timer_init = mrst_time_init; + x86_init.timers.setup_percpu_clockev = x86_init_noop; + + x86_init.irqs.pre_vector_init = x86_init_noop; + + x86_init.oem.arch_setup = mrst_arch_setup; + + x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; + + x86_platform.calibrate_tsc = mrst_calibrate_tsc; + x86_platform.i8042_detect = mrst_i8042_detect; + x86_init.timers.wallclock_init = mrst_rtc_init; + x86_platform.get_nmi_reason = mrst_get_nmi_reason; + + x86_init.pci.init = pci_mrst_init; + x86_init.pci.fixup_irqs = x86_init_noop; + + legacy_pic = &null_legacy_pic; + + /* Moorestown specific power_off/restart method */ + pm_power_off = mrst_power_off; + machine_ops.emergency_restart = mrst_reboot; + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; + set_bit(MP_BUS_ISA, mp_bus_not_pci); +} + +/* + * if user does not want to use per CPU apb timer, just give it a lower rating + * than local apic timer and skip the late per cpu timer init. + */ +static inline int __init setup_x86_mrst_timer(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp("apbt_only", arg) == 0) + mrst_timer_options = MRST_TIMER_APBT_ONLY; + else if (strcmp("lapic_and_apbt", arg) == 0) + mrst_timer_options = MRST_TIMER_LAPIC_APBT; + else { + pr_warning("X86 MRST timer option %s not recognised" + " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", + arg); + return -EINVAL; + } + return 0; +} +__setup("x86_mrst_timer=", setup_x86_mrst_timer); + +/* + * Parsing GPIO table first, since the DEVS table will need this table + * to map the pin name to the actual pin. + */ +static struct sfi_gpio_table_entry *gpio_table; +static int gpio_num_entry; + +static int __init sfi_parse_gpio(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_gpio_table_entry *pentry; + int num, i; + + if (gpio_table) + return 0; + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); + pentry = (struct sfi_gpio_table_entry *)sb->pentry; + + gpio_table = (struct sfi_gpio_table_entry *) + kmalloc(num * sizeof(*pentry), GFP_KERNEL); + if (!gpio_table) + return -1; + memcpy(gpio_table, pentry, num * sizeof(*pentry)); + gpio_num_entry = num; + + pr_debug("GPIO pin info:\n"); + for (i = 0; i < num; i++, pentry++) + pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s," + " pin = %d\n", i, + pentry->controller_name, + pentry->pin_name, + pentry->pin_no); + return 0; +} + +static int get_gpio_by_name(const char *name) +{ + struct sfi_gpio_table_entry *pentry = gpio_table; + int i; + + if (!pentry) + return -1; + for (i = 0; i < gpio_num_entry; i++, pentry++) { + if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) + return pentry->pin_no; + } + return -1; +} + +/* + * Here defines the array of devices platform data that IAFW would export + * through SFI "DEVS" table, we use name and type to match the device and + * its platform data. + */ +struct devs_id { + char name[SFI_NAME_LEN + 1]; + u8 type; + u8 delay; + void *(*get_platform_data)(void *info); +}; + +/* the offset for the mapping of global gpio pin to irq */ +#define MRST_IRQ_OFFSET 0x100 + +static void __init *pmic_gpio_platform_data(void *info) +{ + static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; + int gpio_base = get_gpio_by_name("pmic_gpio_base"); + + if (gpio_base == -1) + gpio_base = 64; + pmic_gpio_pdata.gpio_base = gpio_base; + pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET; + pmic_gpio_pdata.gpiointr = 0xffffeff8; + + return &pmic_gpio_pdata; +} + +static void __init *max3111_platform_data(void *info) +{ + struct spi_board_info *spi_info = info; + int intr = get_gpio_by_name("max3111_int"); + + spi_info->mode = SPI_MODE_0; + if (intr == -1) + return NULL; + spi_info->irq = intr + MRST_IRQ_OFFSET; + return NULL; +} + +/* we have multiple max7315 on the board ... */ +#define MAX7315_NUM 2 +static void __init *max7315_platform_data(void *info) +{ + static struct pca953x_platform_data max7315_pdata[MAX7315_NUM]; + static int nr; + struct pca953x_platform_data *max7315 = &max7315_pdata[nr]; + struct i2c_board_info *i2c_info = info; + int gpio_base, intr; + char base_pin_name[SFI_NAME_LEN + 1]; + char intr_pin_name[SFI_NAME_LEN + 1]; + + if (nr == MAX7315_NUM) { + pr_err("too many max7315s, we only support %d\n", + MAX7315_NUM); + return NULL; + } + /* we have several max7315 on the board, we only need load several + * instances of the same pca953x driver to cover them + */ + strcpy(i2c_info->type, "max7315"); + if (nr++) { + sprintf(base_pin_name, "max7315_%d_base", nr); + sprintf(intr_pin_name, "max7315_%d_int", nr); + } else { + strcpy(base_pin_name, "max7315_base"); + strcpy(intr_pin_name, "max7315_int"); + } + + gpio_base = get_gpio_by_name(base_pin_name); + intr = get_gpio_by_name(intr_pin_name); + + if (gpio_base == -1) + return NULL; + max7315->gpio_base = gpio_base; + if (intr != -1) { + i2c_info->irq = intr + MRST_IRQ_OFFSET; + max7315->irq_base = gpio_base + MRST_IRQ_OFFSET; + } else { + i2c_info->irq = -1; + max7315->irq_base = -1; + } + return max7315; +} + +static void *tca6416_platform_data(void *info) +{ + static struct pca953x_platform_data tca6416; + struct i2c_board_info *i2c_info = info; + int gpio_base, intr; + char base_pin_name[SFI_NAME_LEN + 1]; + char intr_pin_name[SFI_NAME_LEN + 1]; + + strcpy(i2c_info->type, "tca6416"); + strcpy(base_pin_name, "tca6416_base"); + strcpy(intr_pin_name, "tca6416_int"); + + gpio_base = get_gpio_by_name(base_pin_name); + intr = get_gpio_by_name(intr_pin_name); + + if (gpio_base == -1) + return NULL; + tca6416.gpio_base = gpio_base; + if (intr != -1) { + i2c_info->irq = intr + MRST_IRQ_OFFSET; + tca6416.irq_base = gpio_base + MRST_IRQ_OFFSET; + } else { + i2c_info->irq = -1; + tca6416.irq_base = -1; + } + return &tca6416; +} + +static void *mpu3050_platform_data(void *info) +{ + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("mpu3050_int"); + + if (intr == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + return NULL; +} + +static void __init *emc1403_platform_data(void *info) +{ + static short intr2nd_pdata; + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("thermal_int"); + int intr2nd = get_gpio_by_name("thermal_alert"); + + if (intr == -1 || intr2nd == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; + + return &intr2nd_pdata; +} + +static void __init *lis331dl_platform_data(void *info) +{ + static short intr2nd_pdata; + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("accel_int"); + int intr2nd = get_gpio_by_name("accel_2"); + + if (intr == -1 || intr2nd == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; + + return &intr2nd_pdata; +} + +static void __init *no_platform_data(void *info) +{ + return NULL; +} + +static struct resource msic_resources[] = { + { + .start = INTEL_MSIC_IRQ_PHYS_BASE, + .end = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct intel_msic_platform_data msic_pdata; + +static struct platform_device msic_device = { + .name = "intel_msic", + .id = -1, + .dev = { + .platform_data = &msic_pdata, + }, + .num_resources = ARRAY_SIZE(msic_resources), + .resource = msic_resources, +}; + +static inline bool mrst_has_msic(void) +{ + return mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL; +} + +static int msic_scu_status_change(struct notifier_block *nb, + unsigned long code, void *data) +{ + if (code == SCU_DOWN) { + platform_device_unregister(&msic_device); + return 0; + } + + return platform_device_register(&msic_device); +} + +static int __init msic_init(void) +{ + static struct notifier_block msic_scu_notifier = { + .notifier_call = msic_scu_status_change, + }; + + /* + * We need to be sure that the SCU IPC is ready before MSIC device + * can be registered. + */ + if (mrst_has_msic()) + intel_scu_notifier_add(&msic_scu_notifier); + + return 0; +} +arch_initcall(msic_init); + +/* + * msic_generic_platform_data - sets generic platform data for the block + * @info: pointer to the SFI device table entry for this block + * @block: MSIC block + * + * Function sets IRQ number from the SFI table entry for given device to + * the MSIC platform data. + */ +static void *msic_generic_platform_data(void *info, enum intel_msic_block block) +{ + struct sfi_device_table_entry *entry = info; + + BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST); + msic_pdata.irq[block] = entry->irq; + + return no_platform_data(info); +} + +static void *msic_battery_platform_data(void *info) +{ + return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY); +} + +static void *msic_gpio_platform_data(void *info) +{ + static struct intel_msic_gpio_pdata pdata; + int gpio = get_gpio_by_name("msic_gpio_base"); + + if (gpio < 0) + return NULL; + + pdata.gpio_base = gpio; + msic_pdata.gpio = &pdata; + + return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO); +} + +static void *msic_audio_platform_data(void *info) +{ + struct platform_device *pdev; + + pdev = platform_device_register_simple("sst-platform", -1, NULL, 0); + if (IS_ERR(pdev)) { + pr_err("failed to create audio platform device\n"); + return NULL; + } + + return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO); +} + +static void *msic_power_btn_platform_data(void *info) +{ + return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN); +} + +static void *msic_ocd_platform_data(void *info) +{ + static struct intel_msic_ocd_pdata pdata; + int gpio = get_gpio_by_name("ocd_gpio"); + + if (gpio < 0) + return NULL; + + pdata.gpio = gpio; + msic_pdata.ocd = &pdata; + + return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD); +} + +static void *msic_thermal_platform_data(void *info) +{ + return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL); +} + +/* tc35876x DSI-LVDS bridge chip and panel platform data */ +static void *tc35876x_platform_data(void *data) +{ + static struct tc35876x_platform_data pdata; + + /* gpio pins set to -1 will not be used by the driver */ + pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN"); + pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN"); + pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3"); + + return &pdata; +} + +static const struct devs_id __initconst device_ids[] = { + {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data}, + {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, + {"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data}, + {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data}, + {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, + {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, + {"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data}, + {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data}, + {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, + {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, + {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data}, + {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data}, + + /* MSIC subdevices */ + {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data}, + {"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data}, + {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data}, + {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data}, + {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data}, + {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data}, + + {}, +}; + +#define MAX_IPCDEVS 24 +static struct platform_device *ipc_devs[MAX_IPCDEVS]; +static int ipc_next_dev; + +#define MAX_SCU_SPI 24 +static struct spi_board_info *spi_devs[MAX_SCU_SPI]; +static int spi_next_dev; + +#define MAX_SCU_I2C 24 +static struct i2c_board_info *i2c_devs[MAX_SCU_I2C]; +static int i2c_bus[MAX_SCU_I2C]; +static int i2c_next_dev; + +static void __init intel_scu_device_register(struct platform_device *pdev) +{ + if(ipc_next_dev == MAX_IPCDEVS) + pr_err("too many SCU IPC devices"); + else + ipc_devs[ipc_next_dev++] = pdev; +} + +static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) +{ + struct spi_board_info *new_dev; + + if (spi_next_dev == MAX_SCU_SPI) { + pr_err("too many SCU SPI devices"); + return; + } + + new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!new_dev) { + pr_err("failed to alloc mem for delayed spi dev %s\n", + sdev->modalias); + return; + } + memcpy(new_dev, sdev, sizeof(*sdev)); + + spi_devs[spi_next_dev++] = new_dev; +} + +static void __init intel_scu_i2c_device_register(int bus, + struct i2c_board_info *idev) +{ + struct i2c_board_info *new_dev; + + if (i2c_next_dev == MAX_SCU_I2C) { + pr_err("too many SCU I2C devices"); + return; + } + + new_dev = kzalloc(sizeof(*idev), GFP_KERNEL); + if (!new_dev) { + pr_err("failed to alloc mem for delayed i2c dev %s\n", + idev->type); + return; + } + memcpy(new_dev, idev, sizeof(*idev)); + + i2c_bus[i2c_next_dev] = bus; + i2c_devs[i2c_next_dev++] = new_dev; +} + +BLOCKING_NOTIFIER_HEAD(intel_scu_notifier); +EXPORT_SYMBOL_GPL(intel_scu_notifier); + +/* Called by IPC driver */ +void intel_scu_devices_create(void) +{ + int i; + + for (i = 0; i < ipc_next_dev; i++) + platform_device_add(ipc_devs[i]); + + for (i = 0; i < spi_next_dev; i++) + spi_register_board_info(spi_devs[i], 1); + + for (i = 0; i < i2c_next_dev; i++) { + struct i2c_adapter *adapter; + struct i2c_client *client; + + adapter = i2c_get_adapter(i2c_bus[i]); + if (adapter) { + client = i2c_new_device(adapter, i2c_devs[i]); + if (!client) + pr_err("can't create i2c device %s\n", + i2c_devs[i]->type); + } else + i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); + } + intel_scu_notifier_post(SCU_AVAILABLE, NULL); +} +EXPORT_SYMBOL_GPL(intel_scu_devices_create); + +/* Called by IPC driver */ +void intel_scu_devices_destroy(void) +{ + int i; + + intel_scu_notifier_post(SCU_DOWN, NULL); + + for (i = 0; i < ipc_next_dev; i++) + platform_device_del(ipc_devs[i]); +} +EXPORT_SYMBOL_GPL(intel_scu_devices_destroy); + +static void __init install_irq_resource(struct platform_device *pdev, int irq) +{ + /* Single threaded */ + static struct resource __initdata res = { + .name = "IRQ", + .flags = IORESOURCE_IRQ, + }; + res.start = irq; + platform_device_add_resources(pdev, &res, 1); +} + +static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry) +{ + const struct devs_id *dev = device_ids; + struct platform_device *pdev; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_IPC && + !strncmp(dev->name, entry->name, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(entry); + break; + } + dev++; + } + + /* + * On Medfield the platform device creation is handled by the MSIC + * MFD driver so we don't need to do it here. + */ + if (mrst_has_msic()) + return; + + pdev = platform_device_alloc(entry->name, 0); + if (pdev == NULL) { + pr_err("out of memory for SFI platform device '%s'.\n", + entry->name); + return; + } + install_irq_resource(pdev, entry->irq); + + pdev->dev.platform_data = pdata; + intel_scu_device_register(pdev); +} + +static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_SPI && + !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(spi_info); + break; + } + dev++; + } + spi_info->platform_data = pdata; + if (dev->delay) + intel_scu_spi_device_register(spi_info); + else + spi_register_board_info(spi_info, 1); +} + +static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_I2C && + !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(i2c_info); + break; + } + dev++; + } + i2c_info->platform_data = pdata; + + if (dev->delay) + intel_scu_i2c_device_register(bus, i2c_info); + else + i2c_register_board_info(bus, i2c_info, 1); + } + + +static int __init sfi_parse_devs(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_device_table_entry *pentry; + struct spi_board_info spi_info; + struct i2c_board_info i2c_info; + int num, i, bus; + int ioapic; + struct io_apic_irq_attr irq_attr; + + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); + pentry = (struct sfi_device_table_entry *)sb->pentry; + + for (i = 0; i < num; i++, pentry++) { + int irq = pentry->irq; + + if (irq != (u8)0xff) { /* native RTE case */ + /* these SPI2 devices are not exposed to system as PCI + * devices, but they have separate RTE entry in IOAPIC + * so we have to enable them one by one here + */ + ioapic = mp_find_ioapic(irq); + irq_attr.ioapic = ioapic; + irq_attr.ioapic_pin = irq; + irq_attr.trigger = 1; + irq_attr.polarity = 1; + io_apic_set_pci_routing(NULL, irq, &irq_attr); + } else + irq = 0; /* No irq */ + + switch (pentry->type) { + case SFI_DEV_TYPE_IPC: + pr_debug("info[%2d]: IPC bus, name = %16.16s, " + "irq = 0x%2x\n", i, pentry->name, pentry->irq); + sfi_handle_ipc_dev(pentry); + break; + case SFI_DEV_TYPE_SPI: + memset(&spi_info, 0, sizeof(spi_info)); + strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); + spi_info.irq = irq; + spi_info.bus_num = pentry->host_num; + spi_info.chip_select = pentry->addr; + spi_info.max_speed_hz = pentry->max_freq; + pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, " + "irq = 0x%2x, max_freq = %d, cs = %d\n", i, + spi_info.bus_num, + spi_info.modalias, + spi_info.irq, + spi_info.max_speed_hz, + spi_info.chip_select); + sfi_handle_spi_dev(&spi_info); + break; + case SFI_DEV_TYPE_I2C: + memset(&i2c_info, 0, sizeof(i2c_info)); + bus = pentry->host_num; + strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); + i2c_info.irq = irq; + i2c_info.addr = pentry->addr; + pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " + "irq = 0x%2x, addr = 0x%x\n", i, bus, + i2c_info.type, + i2c_info.irq, + i2c_info.addr); + sfi_handle_i2c_dev(bus, &i2c_info); + break; + case SFI_DEV_TYPE_UART: + case SFI_DEV_TYPE_HSI: + default: + ; + } + } + return 0; +} + +static int __init mrst_platform_init(void) +{ + sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); + sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); + return 0; +} +arch_initcall(mrst_platform_init); + +/* + * we will search these buttons in SFI GPIO table (by name) + * and register them dynamically. Please add all possible + * buttons here, we will shrink them if no GPIO found. + */ +static struct gpio_keys_button gpio_button[] = { + {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000}, + {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20}, + {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20}, + {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, + {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, + {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, + {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, + {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, + {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, + {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20}, +}; + +static struct gpio_keys_platform_data mrst_gpio_keys = { + .buttons = gpio_button, + .rep = 1, + .nbuttons = -1, /* will fill it after search */ +}; + +static struct platform_device pb_device = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &mrst_gpio_keys, + }, +}; + +/* + * Shrink the non-existent buttons, register the gpio button + * device if there is some + */ +static int __init pb_keys_init(void) +{ + struct gpio_keys_button *gb = gpio_button; + int i, num, good = 0; + + num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); + for (i = 0; i < num; i++) { + gb[i].gpio = get_gpio_by_name(gb[i].desc); + pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio); + if (gb[i].gpio == -1) + continue; + + if (i != good) + gb[good] = gb[i]; + good++; + } + + if (good) { + mrst_gpio_keys.nbuttons = good; + return platform_device_register(&pb_device); + } + return 0; +} +late_initcall(pb_keys_init); diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c new file mode 100644 index 00000000..225bd0f0 --- /dev/null +++ b/arch/x86/platform/mrst/vrtc.c @@ -0,0 +1,169 @@ +/* + * vrtc.c: Driver for virtual RTC device on Intel MID platform + * + * (C) Copyright 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * Note: + * VRTC is emulated by system controller firmware, the real HW + * RTC is located in the PMIC device. SCU FW shadows PMIC RTC + * in a memory mapped IO space that is visible to the host IA + * processor. + * + * This driver is based on RTC CMOS driver. + */ + +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/sfi.h> +#include <linux/platform_device.h> + +#include <asm/mrst.h> +#include <asm/mrst-vrtc.h> +#include <asm/time.h> +#include <asm/fixmap.h> + +static unsigned char __iomem *vrtc_virt_base; + +unsigned char vrtc_cmos_read(unsigned char reg) +{ + unsigned char retval; + + /* vRTC's registers range from 0x0 to 0xD */ + if (reg > 0xd || !vrtc_virt_base) + return 0xff; + + lock_cmos_prefix(reg); + retval = __raw_readb(vrtc_virt_base + (reg << 2)); + lock_cmos_suffix(reg); + return retval; +} +EXPORT_SYMBOL_GPL(vrtc_cmos_read); + +void vrtc_cmos_write(unsigned char val, unsigned char reg) +{ + if (reg > 0xd || !vrtc_virt_base) + return; + + lock_cmos_prefix(reg); + __raw_writeb(val, vrtc_virt_base + (reg << 2)); + lock_cmos_suffix(reg); +} +EXPORT_SYMBOL_GPL(vrtc_cmos_write); + +unsigned long vrtc_get_time(void) +{ + u8 sec, min, hour, mday, mon; + unsigned long flags; + u32 year; + + spin_lock_irqsave(&rtc_lock, flags); + + while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP)) + cpu_relax(); + + sec = vrtc_cmos_read(RTC_SECONDS); + min = vrtc_cmos_read(RTC_MINUTES); + hour = vrtc_cmos_read(RTC_HOURS); + mday = vrtc_cmos_read(RTC_DAY_OF_MONTH); + mon = vrtc_cmos_read(RTC_MONTH); + year = vrtc_cmos_read(RTC_YEAR); + + spin_unlock_irqrestore(&rtc_lock, flags); + + /* vRTC YEAR reg contains the offset to 1972 */ + year += 1972; + + printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " + "mon: %d year: %d\n", sec, min, hour, mday, mon, year); + + return mktime(year, mon, mday, hour, min, sec); +} + +/* Only care about the minutes and seconds */ +int vrtc_set_mmss(unsigned long nowtime) +{ + int real_sec, real_min; + unsigned long flags; + int vrtc_min; + + spin_lock_irqsave(&rtc_lock, flags); + vrtc_min = vrtc_cmos_read(RTC_MINUTES); + + real_sec = nowtime % 60; + real_min = nowtime / 60; + if (((abs(real_min - vrtc_min) + 15)/30) & 1) + real_min += 30; + real_min %= 60; + + vrtc_cmos_write(real_sec, RTC_SECONDS); + vrtc_cmos_write(real_min, RTC_MINUTES); + spin_unlock_irqrestore(&rtc_lock, flags); + + return 0; +} + +void __init mrst_rtc_init(void) +{ + unsigned long vrtc_paddr; + + sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); + + vrtc_paddr = sfi_mrtc_array[0].phys_addr; + if (!sfi_mrtc_num || !vrtc_paddr) + return; + + vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC, + vrtc_paddr); + x86_platform.get_wallclock = vrtc_get_time; + x86_platform.set_wallclock = vrtc_set_mmss; +} + +/* + * The Moorestown platform has a memory mapped virtual RTC device that emulates + * the programming interface of the RTC. + */ + +static struct resource vrtc_resources[] = { + [0] = { + .flags = IORESOURCE_MEM, + }, + [1] = { + .flags = IORESOURCE_IRQ, + } +}; + +static struct platform_device vrtc_device = { + .name = "rtc_mrst", + .id = -1, + .resource = vrtc_resources, + .num_resources = ARRAY_SIZE(vrtc_resources), +}; + +/* Register the RTC device if appropriate */ +static int __init mrst_device_create(void) +{ + /* No Moorestown, no device */ + if (!mrst_identify_cpu()) + return -ENODEV; + /* No timer, no device */ + if (!sfi_mrtc_num) + return -ENODEV; + + /* iomem resource */ + vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr; + vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr + + MRST_VRTC_MAP_SZ; + /* irq resource */ + vrtc_resources[1].start = sfi_mrtc_array[0].irq; + vrtc_resources[1].end = sfi_mrtc_array[0].irq; + + return platform_device_register(&vrtc_device); +} + +module_init(mrst_device_create); diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile new file mode 100644 index 00000000..fd332c53 --- /dev/null +++ b/arch/x86/platform/olpc/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o +obj-$(CONFIG_OLPC_XO1_PM) += olpc-xo1-pm.o xo1-wakeup.o +obj-$(CONFIG_OLPC_XO1_RTC) += olpc-xo1-rtc.o +obj-$(CONFIG_OLPC_XO1_SCI) += olpc-xo1-sci.o +obj-$(CONFIG_OLPC_XO15_SCI) += olpc-xo15-sci.o diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c new file mode 100644 index 00000000..0ce8616c --- /dev/null +++ b/arch/x86/platform/olpc/olpc-xo1-pm.c @@ -0,0 +1,216 @@ +/* + * Support for power management features of the OLPC XO-1 laptop + * + * Copyright (C) 2010 Andres Salomon <dilinger@queued.net> + * Copyright (C) 2010 One Laptop per Child + * Copyright (C) 2006 Red Hat, Inc. + * Copyright (C) 2006 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/cs5535.h> +#include <linux/platform_device.h> +#include <linux/export.h> +#include <linux/pm.h> +#include <linux/mfd/core.h> +#include <linux/suspend.h> + +#include <asm/io.h> +#include <asm/olpc.h> + +#define DRV_NAME "olpc-xo1-pm" + +static unsigned long acpi_base; +static unsigned long pms_base; + +static u16 wakeup_mask = CS5536_PM_PWRBTN; + +static struct { + unsigned long address; + unsigned short segment; +} ofw_bios_entry = { 0xF0000 + PAGE_OFFSET, __KERNEL_CS }; + +/* Set bits in the wakeup mask */ +void olpc_xo1_pm_wakeup_set(u16 value) +{ + wakeup_mask |= value; +} +EXPORT_SYMBOL_GPL(olpc_xo1_pm_wakeup_set); + +/* Clear bits in the wakeup mask */ +void olpc_xo1_pm_wakeup_clear(u16 value) +{ + wakeup_mask &= ~value; +} +EXPORT_SYMBOL_GPL(olpc_xo1_pm_wakeup_clear); + +static int xo1_power_state_enter(suspend_state_t pm_state) +{ + unsigned long saved_sci_mask; + int r; + + /* Only STR is supported */ + if (pm_state != PM_SUSPEND_MEM) + return -EINVAL; + + r = olpc_ec_cmd(EC_SET_SCI_INHIBIT, NULL, 0, NULL, 0); + if (r) + return r; + + /* + * Save SCI mask (this gets lost since PM1_EN is used as a mask for + * wakeup events, which is not necessarily the same event set) + */ + saved_sci_mask = inl(acpi_base + CS5536_PM1_STS); + saved_sci_mask &= 0xffff0000; + + /* Save CPU state */ + do_olpc_suspend_lowlevel(); + + /* Resume path starts here */ + + /* Restore SCI mask (using dword access to CS5536_PM1_EN) */ + outl(saved_sci_mask, acpi_base + CS5536_PM1_STS); + + /* Tell the EC to stop inhibiting SCIs */ + olpc_ec_cmd(EC_SET_SCI_INHIBIT_RELEASE, NULL, 0, NULL, 0); + + /* + * Tell the wireless module to restart USB communication. + * Must be done twice. + */ + olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0); + olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0); + + return 0; +} + +asmlinkage int xo1_do_sleep(u8 sleep_state) +{ + void *pgd_addr = __va(read_cr3()); + + /* Program wakeup mask (using dword access to CS5536_PM1_EN) */ + outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS); + + __asm__("movl %0,%%eax" : : "r" (pgd_addr)); + __asm__("call *(%%edi); cld" + : : "D" (&ofw_bios_entry)); + __asm__("movb $0x34, %al\n\t" + "outb %al, $0x70\n\t" + "movb $0x30, %al\n\t" + "outb %al, $0x71\n\t"); + return 0; +} + +static void xo1_power_off(void) +{ + printk(KERN_INFO "OLPC XO-1 power off sequence...\n"); + + /* Enable all of these controls with 0 delay */ + outl(0x40000000, pms_base + CS5536_PM_SCLK); + outl(0x40000000, pms_base + CS5536_PM_IN_SLPCTL); + outl(0x40000000, pms_base + CS5536_PM_WKXD); + outl(0x40000000, pms_base + CS5536_PM_WKD); + + /* Clear status bits (possibly unnecessary) */ + outl(0x0002ffff, pms_base + CS5536_PM_SSC); + outl(0xffffffff, acpi_base + CS5536_PM_GPE0_STS); + + /* Write SLP_EN bit to start the machinery */ + outl(0x00002000, acpi_base + CS5536_PM1_CNT); +} + +static int xo1_power_state_valid(suspend_state_t pm_state) +{ + /* suspend-to-RAM only */ + return pm_state == PM_SUSPEND_MEM; +} + +static const struct platform_suspend_ops xo1_suspend_ops = { + .valid = xo1_power_state_valid, + .enter = xo1_power_state_enter, +}; + +static int __devinit xo1_pm_probe(struct platform_device *pdev) +{ + struct resource *res; + int err; + + /* don't run on non-XOs */ + if (!machine_is_olpc()) + return -ENODEV; + + err = mfd_cell_enable(pdev); + if (err) + return err; + + res = platform_get_resource(pdev, IORESOURCE_IO, 0); + if (!res) { + dev_err(&pdev->dev, "can't fetch device resource info\n"); + return -EIO; + } + if (strcmp(pdev->name, "cs5535-pms") == 0) + pms_base = res->start; + else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0) + acpi_base = res->start; + + /* If we have both addresses, we can override the poweroff hook */ + if (pms_base && acpi_base) { + suspend_set_ops(&xo1_suspend_ops); + pm_power_off = xo1_power_off; + printk(KERN_INFO "OLPC XO-1 support registered\n"); + } + + return 0; +} + +static int __devexit xo1_pm_remove(struct platform_device *pdev) +{ + mfd_cell_disable(pdev); + + if (strcmp(pdev->name, "cs5535-pms") == 0) + pms_base = 0; + else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0) + acpi_base = 0; + + pm_power_off = NULL; + return 0; +} + +static struct platform_driver cs5535_pms_driver = { + .driver = { + .name = "cs5535-pms", + .owner = THIS_MODULE, + }, + .probe = xo1_pm_probe, + .remove = __devexit_p(xo1_pm_remove), +}; + +static struct platform_driver cs5535_acpi_driver = { + .driver = { + .name = "olpc-xo1-pm-acpi", + .owner = THIS_MODULE, + }, + .probe = xo1_pm_probe, + .remove = __devexit_p(xo1_pm_remove), +}; + +static int __init xo1_pm_init(void) +{ + int r; + + r = platform_driver_register(&cs5535_pms_driver); + if (r) + return r; + + r = platform_driver_register(&cs5535_acpi_driver); + if (r) + platform_driver_unregister(&cs5535_pms_driver); + + return r; +} +arch_initcall(xo1_pm_init); diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c new file mode 100644 index 00000000..a2b4efdd --- /dev/null +++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c @@ -0,0 +1,81 @@ +/* + * Support for OLPC XO-1 Real Time Clock (RTC) + * + * Copyright (C) 2011 One Laptop per Child + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/mc146818rtc.h> +#include <linux/platform_device.h> +#include <linux/rtc.h> +#include <linux/of.h> + +#include <asm/msr.h> +#include <asm/olpc.h> + +static void rtc_wake_on(struct device *dev) +{ + olpc_xo1_pm_wakeup_set(CS5536_PM_RTC); +} + +static void rtc_wake_off(struct device *dev) +{ + olpc_xo1_pm_wakeup_clear(CS5536_PM_RTC); +} + +static struct resource rtc_platform_resource[] = { + [0] = { + .start = RTC_PORT(0), + .end = RTC_PORT(1), + .flags = IORESOURCE_IO, + }, + [1] = { + .start = RTC_IRQ, + .end = RTC_IRQ, + .flags = IORESOURCE_IRQ, + } +}; + +static struct cmos_rtc_board_info rtc_info = { + .rtc_day_alarm = 0, + .rtc_mon_alarm = 0, + .rtc_century = 0, + .wake_on = rtc_wake_on, + .wake_off = rtc_wake_off, +}; + +static struct platform_device xo1_rtc_device = { + .name = "rtc_cmos", + .id = -1, + .num_resources = ARRAY_SIZE(rtc_platform_resource), + .dev.platform_data = &rtc_info, + .resource = rtc_platform_resource, +}; + +static int __init xo1_rtc_init(void) +{ + int r; + struct device_node *node; + + node = of_find_compatible_node(NULL, NULL, "olpc,xo1-rtc"); + if (!node) + return 0; + of_node_put(node); + + pr_info("olpc-xo1-rtc: Initializing OLPC XO-1 RTC\n"); + rdmsrl(MSR_RTC_DOMA_OFFSET, rtc_info.rtc_day_alarm); + rdmsrl(MSR_RTC_MONA_OFFSET, rtc_info.rtc_mon_alarm); + rdmsrl(MSR_RTC_CEN_OFFSET, rtc_info.rtc_century); + + r = platform_device_register(&xo1_rtc_device); + if (r) + return r; + + device_init_wakeup(&xo1_rtc_device.dev, 1); + return 0; +} +arch_initcall(xo1_rtc_init); diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c new file mode 100644 index 00000000..1d4c783d --- /dev/null +++ b/arch/x86/platform/olpc/olpc-xo1-sci.c @@ -0,0 +1,614 @@ +/* + * Support for OLPC XO-1 System Control Interrupts (SCI) + * + * Copyright (C) 2010 One Laptop per Child + * Copyright (C) 2006 Red Hat, Inc. + * Copyright (C) 2006 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/cs5535.h> +#include <linux/device.h> +#include <linux/gpio.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/mfd/core.h> +#include <linux/power_supply.h> +#include <linux/suspend.h> +#include <linux/workqueue.h> + +#include <asm/io.h> +#include <asm/msr.h> +#include <asm/olpc.h> + +#define DRV_NAME "olpc-xo1-sci" +#define PFX DRV_NAME ": " + +static unsigned long acpi_base; +static struct input_dev *power_button_idev; +static struct input_dev *ebook_switch_idev; +static struct input_dev *lid_switch_idev; + +static int sci_irq; + +static bool lid_open; +static bool lid_inverted; +static int lid_wake_mode; + +enum lid_wake_modes { + LID_WAKE_ALWAYS, + LID_WAKE_OPEN, + LID_WAKE_CLOSE, +}; + +static const char * const lid_wake_mode_names[] = { + [LID_WAKE_ALWAYS] = "always", + [LID_WAKE_OPEN] = "open", + [LID_WAKE_CLOSE] = "close", +}; + +static void battery_status_changed(void) +{ + struct power_supply *psy = power_supply_get_by_name("olpc-battery"); + + if (psy) { + power_supply_changed(psy); + put_device(psy->dev); + } +} + +static void ac_status_changed(void) +{ + struct power_supply *psy = power_supply_get_by_name("olpc-ac"); + + if (psy) { + power_supply_changed(psy); + put_device(psy->dev); + } +} + +/* Report current ebook switch state through input layer */ +static void send_ebook_state(void) +{ + unsigned char state; + + if (olpc_ec_cmd(EC_READ_EB_MODE, NULL, 0, &state, 1)) { + pr_err(PFX "failed to get ebook state\n"); + return; + } + + input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state); + input_sync(ebook_switch_idev); +} + +static void flip_lid_inverter(void) +{ + /* gpio is high; invert so we'll get l->h event interrupt */ + if (lid_inverted) + cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_INPUT_INVERT); + else + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_INPUT_INVERT); + lid_inverted = !lid_inverted; +} + +static void detect_lid_state(void) +{ + /* + * the edge detector hookup on the gpio inputs on the geode is + * odd, to say the least. See http://dev.laptop.org/ticket/5703 + * for details, but in a nutshell: we don't use the edge + * detectors. instead, we make use of an anomoly: with the both + * edge detectors turned off, we still get an edge event on a + * positive edge transition. to take advantage of this, we use the + * front-end inverter to ensure that that's the edge we're always + * going to see next. + */ + + int state; + + state = cs5535_gpio_isset(OLPC_GPIO_LID, GPIO_READ_BACK); + lid_open = !state ^ !lid_inverted; /* x ^^ y */ + if (!state) + return; + + flip_lid_inverter(); +} + +/* Report current lid switch state through input layer */ +static void send_lid_state(void) +{ + input_report_switch(lid_switch_idev, SW_LID, !lid_open); + input_sync(lid_switch_idev); +} + +static ssize_t lid_wake_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const char *mode = lid_wake_mode_names[lid_wake_mode]; + return sprintf(buf, "%s\n", mode); +} +static ssize_t lid_wake_mode_set(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int i; + for (i = 0; i < ARRAY_SIZE(lid_wake_mode_names); i++) { + const char *mode = lid_wake_mode_names[i]; + if (strlen(mode) != count || strncasecmp(mode, buf, count)) + continue; + + lid_wake_mode = i; + return count; + } + return -EINVAL; +} +static DEVICE_ATTR(lid_wake_mode, S_IWUSR | S_IRUGO, lid_wake_mode_show, + lid_wake_mode_set); + +/* + * Process all items in the EC's SCI queue. + * + * This is handled in a workqueue because olpc_ec_cmd can be slow (and + * can even timeout). + * + * If propagate_events is false, the queue is drained without events being + * generated for the interrupts. + */ +static void process_sci_queue(bool propagate_events) +{ + int r; + u16 data; + + do { + r = olpc_ec_sci_query(&data); + if (r || !data) + break; + + pr_debug(PFX "SCI 0x%x received\n", data); + + switch (data) { + case EC_SCI_SRC_BATERR: + case EC_SCI_SRC_BATSOC: + case EC_SCI_SRC_BATTERY: + case EC_SCI_SRC_BATCRIT: + battery_status_changed(); + break; + case EC_SCI_SRC_ACPWR: + ac_status_changed(); + break; + } + + if (data == EC_SCI_SRC_EBOOK && propagate_events) + send_ebook_state(); + } while (data); + + if (r) + pr_err(PFX "Failed to clear SCI queue"); +} + +static void process_sci_queue_work(struct work_struct *work) +{ + process_sci_queue(true); +} + +static DECLARE_WORK(sci_work, process_sci_queue_work); + +static irqreturn_t xo1_sci_intr(int irq, void *dev_id) +{ + struct platform_device *pdev = dev_id; + u32 sts; + u32 gpe; + + sts = inl(acpi_base + CS5536_PM1_STS); + outl(sts | 0xffff, acpi_base + CS5536_PM1_STS); + + gpe = inl(acpi_base + CS5536_PM_GPE0_STS); + outl(0xffffffff, acpi_base + CS5536_PM_GPE0_STS); + + dev_dbg(&pdev->dev, "sts %x gpe %x\n", sts, gpe); + + if (sts & CS5536_PWRBTN_FLAG && !(sts & CS5536_WAK_FLAG)) { + input_report_key(power_button_idev, KEY_POWER, 1); + input_sync(power_button_idev); + input_report_key(power_button_idev, KEY_POWER, 0); + input_sync(power_button_idev); + } + + if (gpe & CS5536_GPIOM7_PME_FLAG) { /* EC GPIO */ + cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_NEGATIVE_EDGE_STS); + schedule_work(&sci_work); + } + + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_STS); + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_STS); + detect_lid_state(); + send_lid_state(); + + return IRQ_HANDLED; +} + +static int xo1_sci_suspend(struct platform_device *pdev, pm_message_t state) +{ + if (device_may_wakeup(&power_button_idev->dev)) + olpc_xo1_pm_wakeup_set(CS5536_PM_PWRBTN); + else + olpc_xo1_pm_wakeup_clear(CS5536_PM_PWRBTN); + + if (device_may_wakeup(&ebook_switch_idev->dev)) + olpc_ec_wakeup_set(EC_SCI_SRC_EBOOK); + else + olpc_ec_wakeup_clear(EC_SCI_SRC_EBOOK); + + if (!device_may_wakeup(&lid_switch_idev->dev)) { + cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE); + } else if ((lid_open && lid_wake_mode == LID_WAKE_OPEN) || + (!lid_open && lid_wake_mode == LID_WAKE_CLOSE)) { + flip_lid_inverter(); + + /* we may have just caused an event */ + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_STS); + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_STS); + + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE); + } + + return 0; +} + +static int xo1_sci_resume(struct platform_device *pdev) +{ + /* + * We don't know what may have happened while we were asleep. + * Reestablish our lid setup so we're sure to catch all transitions. + */ + detect_lid_state(); + send_lid_state(); + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE); + + /* Enable all EC events */ + olpc_ec_mask_write(EC_SCI_SRC_ALL); + + /* Power/battery status might have changed too */ + battery_status_changed(); + ac_status_changed(); + return 0; +} + +static int __devinit setup_sci_interrupt(struct platform_device *pdev) +{ + u32 lo, hi; + u32 sts; + int r; + + rdmsr(0x51400020, lo, hi); + sci_irq = (lo >> 20) & 15; + + if (sci_irq) { + dev_info(&pdev->dev, "SCI is mapped to IRQ %d\n", sci_irq); + } else { + /* Zero means masked */ + dev_info(&pdev->dev, "SCI unmapped. Mapping to IRQ 3\n"); + sci_irq = 3; + lo |= 0x00300000; + wrmsrl(0x51400020, lo); + } + + /* Select level triggered in PIC */ + if (sci_irq < 8) { + lo = inb(CS5536_PIC_INT_SEL1); + lo |= 1 << sci_irq; + outb(lo, CS5536_PIC_INT_SEL1); + } else { + lo = inb(CS5536_PIC_INT_SEL2); + lo |= 1 << (sci_irq - 8); + outb(lo, CS5536_PIC_INT_SEL2); + } + + /* Enable SCI from power button, and clear pending interrupts */ + sts = inl(acpi_base + CS5536_PM1_STS); + outl((CS5536_PM_PWRBTN << 16) | 0xffff, acpi_base + CS5536_PM1_STS); + + r = request_irq(sci_irq, xo1_sci_intr, 0, DRV_NAME, pdev); + if (r) + dev_err(&pdev->dev, "can't request interrupt\n"); + + return r; +} + +static int __devinit setup_ec_sci(void) +{ + int r; + + r = gpio_request(OLPC_GPIO_ECSCI, "OLPC-ECSCI"); + if (r) + return r; + + gpio_direction_input(OLPC_GPIO_ECSCI); + + /* Clear pending EC SCI events */ + cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_NEGATIVE_EDGE_STS); + cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_POSITIVE_EDGE_STS); + + /* + * Enable EC SCI events, and map them to both a PME and the SCI + * interrupt. + * + * Ordinarily, in addition to functioning as GPIOs, Geode GPIOs can + * be mapped to regular interrupts *or* Geode-specific Power + * Management Events (PMEs) - events that bring the system out of + * suspend. In this case, we want both of those things - the system + * wakeup, *and* the ability to get an interrupt when an event occurs. + * + * To achieve this, we map the GPIO to a PME, and then we use one + * of the many generic knobs on the CS5535 PIC to additionally map the + * PME to the regular SCI interrupt line. + */ + cs5535_gpio_set(OLPC_GPIO_ECSCI, GPIO_EVENTS_ENABLE); + + /* Set the SCI to cause a PME event on group 7 */ + cs5535_gpio_setup_event(OLPC_GPIO_ECSCI, 7, 1); + + /* And have group 7 also fire the SCI interrupt */ + cs5535_pic_unreqz_select_high(7, sci_irq); + + return 0; +} + +static void free_ec_sci(void) +{ + gpio_free(OLPC_GPIO_ECSCI); +} + +static int __devinit setup_lid_events(void) +{ + int r; + + r = gpio_request(OLPC_GPIO_LID, "OLPC-LID"); + if (r) + return r; + + gpio_direction_input(OLPC_GPIO_LID); + + cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_INPUT_INVERT); + lid_inverted = 0; + + /* Clear edge detection and event enable for now */ + cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE); + cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_EN); + cs5535_gpio_clear(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_EN); + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_NEGATIVE_EDGE_STS); + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_POSITIVE_EDGE_STS); + + /* Set the LID to cause an PME event on group 6 */ + cs5535_gpio_setup_event(OLPC_GPIO_LID, 6, 1); + + /* Set PME group 6 to fire the SCI interrupt */ + cs5535_gpio_set_irq(6, sci_irq); + + /* Enable the event */ + cs5535_gpio_set(OLPC_GPIO_LID, GPIO_EVENTS_ENABLE); + + return 0; +} + +static void free_lid_events(void) +{ + gpio_free(OLPC_GPIO_LID); +} + +static int __devinit setup_power_button(struct platform_device *pdev) +{ + int r; + + power_button_idev = input_allocate_device(); + if (!power_button_idev) + return -ENOMEM; + + power_button_idev->name = "Power Button"; + power_button_idev->phys = DRV_NAME "/input0"; + set_bit(EV_KEY, power_button_idev->evbit); + set_bit(KEY_POWER, power_button_idev->keybit); + + power_button_idev->dev.parent = &pdev->dev; + device_init_wakeup(&power_button_idev->dev, 1); + + r = input_register_device(power_button_idev); + if (r) { + dev_err(&pdev->dev, "failed to register power button: %d\n", r); + input_free_device(power_button_idev); + } + + return r; +} + +static void free_power_button(void) +{ + input_unregister_device(power_button_idev); + input_free_device(power_button_idev); +} + +static int __devinit setup_ebook_switch(struct platform_device *pdev) +{ + int r; + + ebook_switch_idev = input_allocate_device(); + if (!ebook_switch_idev) + return -ENOMEM; + + ebook_switch_idev->name = "EBook Switch"; + ebook_switch_idev->phys = DRV_NAME "/input1"; + set_bit(EV_SW, ebook_switch_idev->evbit); + set_bit(SW_TABLET_MODE, ebook_switch_idev->swbit); + + ebook_switch_idev->dev.parent = &pdev->dev; + device_set_wakeup_capable(&ebook_switch_idev->dev, true); + + r = input_register_device(ebook_switch_idev); + if (r) { + dev_err(&pdev->dev, "failed to register ebook switch: %d\n", r); + input_free_device(ebook_switch_idev); + } + + return r; +} + +static void free_ebook_switch(void) +{ + input_unregister_device(ebook_switch_idev); + input_free_device(ebook_switch_idev); +} + +static int __devinit setup_lid_switch(struct platform_device *pdev) +{ + int r; + + lid_switch_idev = input_allocate_device(); + if (!lid_switch_idev) + return -ENOMEM; + + lid_switch_idev->name = "Lid Switch"; + lid_switch_idev->phys = DRV_NAME "/input2"; + set_bit(EV_SW, lid_switch_idev->evbit); + set_bit(SW_LID, lid_switch_idev->swbit); + + lid_switch_idev->dev.parent = &pdev->dev; + device_set_wakeup_capable(&lid_switch_idev->dev, true); + + r = input_register_device(lid_switch_idev); + if (r) { + dev_err(&pdev->dev, "failed to register lid switch: %d\n", r); + goto err_register; + } + + r = device_create_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode); + if (r) { + dev_err(&pdev->dev, "failed to create wake mode attr: %d\n", r); + goto err_create_attr; + } + + return 0; + +err_create_attr: + input_unregister_device(lid_switch_idev); +err_register: + input_free_device(lid_switch_idev); + return r; +} + +static void free_lid_switch(void) +{ + device_remove_file(&lid_switch_idev->dev, &dev_attr_lid_wake_mode); + input_unregister_device(lid_switch_idev); + input_free_device(lid_switch_idev); +} + +static int __devinit xo1_sci_probe(struct platform_device *pdev) +{ + struct resource *res; + int r; + + /* don't run on non-XOs */ + if (!machine_is_olpc()) + return -ENODEV; + + r = mfd_cell_enable(pdev); + if (r) + return r; + + res = platform_get_resource(pdev, IORESOURCE_IO, 0); + if (!res) { + dev_err(&pdev->dev, "can't fetch device resource info\n"); + return -EIO; + } + acpi_base = res->start; + + r = setup_power_button(pdev); + if (r) + return r; + + r = setup_ebook_switch(pdev); + if (r) + goto err_ebook; + + r = setup_lid_switch(pdev); + if (r) + goto err_lid; + + r = setup_lid_events(); + if (r) + goto err_lidevt; + + r = setup_ec_sci(); + if (r) + goto err_ecsci; + + /* Enable PME generation for EC-generated events */ + outl(CS5536_GPIOM6_PME_EN | CS5536_GPIOM7_PME_EN, + acpi_base + CS5536_PM_GPE0_EN); + + /* Clear pending events */ + outl(0xffffffff, acpi_base + CS5536_PM_GPE0_STS); + process_sci_queue(false); + + /* Initial sync */ + send_ebook_state(); + detect_lid_state(); + send_lid_state(); + + r = setup_sci_interrupt(pdev); + if (r) + goto err_sci; + + /* Enable all EC events */ + olpc_ec_mask_write(EC_SCI_SRC_ALL); + + return r; + +err_sci: + free_ec_sci(); +err_ecsci: + free_lid_events(); +err_lidevt: + free_lid_switch(); +err_lid: + free_ebook_switch(); +err_ebook: + free_power_button(); + return r; +} + +static int __devexit xo1_sci_remove(struct platform_device *pdev) +{ + mfd_cell_disable(pdev); + free_irq(sci_irq, pdev); + cancel_work_sync(&sci_work); + free_ec_sci(); + free_lid_events(); + free_lid_switch(); + free_ebook_switch(); + free_power_button(); + acpi_base = 0; + return 0; +} + +static struct platform_driver xo1_sci_driver = { + .driver = { + .name = "olpc-xo1-sci-acpi", + }, + .probe = xo1_sci_probe, + .remove = __devexit_p(xo1_sci_remove), + .suspend = xo1_sci_suspend, + .resume = xo1_sci_resume, +}; + +static int __init xo1_sci_init(void) +{ + return platform_driver_register(&xo1_sci_driver); +} +arch_initcall(xo1_sci_init); diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c new file mode 100644 index 00000000..23e5b9d7 --- /dev/null +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c @@ -0,0 +1,238 @@ +/* + * Support for OLPC XO-1.5 System Control Interrupts (SCI) + * + * Copyright (C) 2009-2010 One Laptop per Child + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/workqueue.h> +#include <linux/power_supply.h> + +#include <acpi/acpi_bus.h> +#include <acpi/acpi_drivers.h> +#include <asm/olpc.h> + +#define DRV_NAME "olpc-xo15-sci" +#define PFX DRV_NAME ": " +#define XO15_SCI_CLASS DRV_NAME +#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI" + +static unsigned long xo15_sci_gpe; +static bool lid_wake_on_close; + +/* + * The normal ACPI LID wakeup behavior is wake-on-open, but not + * wake-on-close. This is implemented as standard by the XO-1.5 DSDT. + * + * We provide here a sysfs attribute that will additionally enable + * wake-on-close behavior. This is useful (e.g.) when we oportunistically + * suspend with the display running; if the lid is then closed, we want to + * wake up to turn the display off. + * + * This is controlled through a custom method in the XO-1.5 DSDT. + */ +static int set_lid_wake_behavior(bool wake_on_close) +{ + struct acpi_object_list arg_list; + union acpi_object arg; + acpi_status status; + + arg_list.count = 1; + arg_list.pointer = &arg; + arg.type = ACPI_TYPE_INTEGER; + arg.integer.value = wake_on_close; + + status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL); + if (ACPI_FAILURE(status)) { + pr_warning(PFX "failed to set lid behavior\n"); + return 1; + } + + lid_wake_on_close = wake_on_close; + + return 0; +} + +static ssize_t +lid_wake_on_close_show(struct kobject *s, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", lid_wake_on_close); +} + +static ssize_t lid_wake_on_close_store(struct kobject *s, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int val; + + if (sscanf(buf, "%u", &val) != 1) + return -EINVAL; + + set_lid_wake_behavior(!!val); + + return n; +} + +static struct kobj_attribute lid_wake_on_close_attr = + __ATTR(lid_wake_on_close, 0644, + lid_wake_on_close_show, + lid_wake_on_close_store); + +static void battery_status_changed(void) +{ + struct power_supply *psy = power_supply_get_by_name("olpc-battery"); + + if (psy) { + power_supply_changed(psy); + put_device(psy->dev); + } +} + +static void ac_status_changed(void) +{ + struct power_supply *psy = power_supply_get_by_name("olpc-ac"); + + if (psy) { + power_supply_changed(psy); + put_device(psy->dev); + } +} + +static void process_sci_queue(void) +{ + u16 data; + int r; + + do { + r = olpc_ec_sci_query(&data); + if (r || !data) + break; + + pr_debug(PFX "SCI 0x%x received\n", data); + + switch (data) { + case EC_SCI_SRC_BATERR: + case EC_SCI_SRC_BATSOC: + case EC_SCI_SRC_BATTERY: + case EC_SCI_SRC_BATCRIT: + battery_status_changed(); + break; + case EC_SCI_SRC_ACPWR: + ac_status_changed(); + break; + } + } while (data); + + if (r) + pr_err(PFX "Failed to clear SCI queue"); +} + +static void process_sci_queue_work(struct work_struct *work) +{ + process_sci_queue(); +} + +static DECLARE_WORK(sci_work, process_sci_queue_work); + +static u32 xo15_sci_gpe_handler(acpi_handle gpe_device, u32 gpe, void *context) +{ + schedule_work(&sci_work); + return ACPI_INTERRUPT_HANDLED | ACPI_REENABLE_GPE; +} + +static int xo15_sci_add(struct acpi_device *device) +{ + unsigned long long tmp; + acpi_status status; + int r; + + if (!device) + return -EINVAL; + + strcpy(acpi_device_name(device), XO15_SCI_DEVICE_NAME); + strcpy(acpi_device_class(device), XO15_SCI_CLASS); + + /* Get GPE bit assignment (EC events). */ + status = acpi_evaluate_integer(device->handle, "_GPE", NULL, &tmp); + if (ACPI_FAILURE(status)) + return -EINVAL; + + xo15_sci_gpe = tmp; + status = acpi_install_gpe_handler(NULL, xo15_sci_gpe, + ACPI_GPE_EDGE_TRIGGERED, + xo15_sci_gpe_handler, device); + if (ACPI_FAILURE(status)) + return -ENODEV; + + dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe); + + r = sysfs_create_file(&device->dev.kobj, &lid_wake_on_close_attr.attr); + if (r) + goto err_sysfs; + + /* Flush queue, and enable all SCI events */ + process_sci_queue(); + olpc_ec_mask_write(EC_SCI_SRC_ALL); + + acpi_enable_gpe(NULL, xo15_sci_gpe); + + /* Enable wake-on-EC */ + if (device->wakeup.flags.valid) + device_init_wakeup(&device->dev, true); + + return 0; + +err_sysfs: + acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); + cancel_work_sync(&sci_work); + return r; +} + +static int xo15_sci_remove(struct acpi_device *device, int type) +{ + acpi_disable_gpe(NULL, xo15_sci_gpe); + acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); + cancel_work_sync(&sci_work); + sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr); + return 0; +} + +static int xo15_sci_resume(struct acpi_device *device) +{ + /* Enable all EC events */ + olpc_ec_mask_write(EC_SCI_SRC_ALL); + + /* Power/battery status might have changed */ + battery_status_changed(); + ac_status_changed(); + + return 0; +} + +static const struct acpi_device_id xo15_sci_device_ids[] = { + {"XO15EC", 0}, + {"", 0}, +}; + +static struct acpi_driver xo15_sci_drv = { + .name = DRV_NAME, + .class = XO15_SCI_CLASS, + .ids = xo15_sci_device_ids, + .ops = { + .add = xo15_sci_add, + .remove = xo15_sci_remove, + .resume = xo15_sci_resume, + }, +}; + +static int __init xo15_sci_init(void) +{ + return acpi_bus_register_driver(&xo15_sci_drv); +} +device_initcall(xo15_sci_init); diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c new file mode 100644 index 00000000..a4bee53c --- /dev/null +++ b/arch/x86/platform/olpc/olpc.c @@ -0,0 +1,476 @@ +/* + * Support for the OLPC DCON and OLPC EC access + * + * Copyright © 2006 Advanced Micro Devices, Inc. + * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/string.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/syscore_ops.h> +#include <linux/debugfs.h> +#include <linux/mutex.h> + +#include <asm/geode.h> +#include <asm/setup.h> +#include <asm/olpc.h> +#include <asm/olpc_ofw.h> + +struct olpc_platform_t olpc_platform_info; +EXPORT_SYMBOL_GPL(olpc_platform_info); + +static DEFINE_SPINLOCK(ec_lock); + +/* debugfs interface to EC commands */ +#define EC_MAX_CMD_ARGS (5 + 1) /* cmd byte + 5 args */ +#define EC_MAX_CMD_REPLY (8) + +static struct dentry *ec_debugfs_dir; +static DEFINE_MUTEX(ec_debugfs_cmd_lock); +static unsigned char ec_debugfs_resp[EC_MAX_CMD_REPLY]; +static unsigned int ec_debugfs_resp_bytes; + +/* EC event mask to be applied during suspend (defining wakeup sources). */ +static u16 ec_wakeup_mask; + +/* what the timeout *should* be (in ms) */ +#define EC_BASE_TIMEOUT 20 + +/* the timeout that bugs in the EC might force us to actually use */ +static int ec_timeout = EC_BASE_TIMEOUT; + +static int __init olpc_ec_timeout_set(char *str) +{ + if (get_option(&str, &ec_timeout) != 1) { + ec_timeout = EC_BASE_TIMEOUT; + printk(KERN_ERR "olpc-ec: invalid argument to " + "'olpc_ec_timeout=', ignoring!\n"); + } + printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n", + ec_timeout); + return 1; +} +__setup("olpc_ec_timeout=", olpc_ec_timeout_set); + +/* + * These {i,o}bf_status functions return whether the buffers are full or not. + */ + +static inline unsigned int ibf_status(unsigned int port) +{ + return !!(inb(port) & 0x02); +} + +static inline unsigned int obf_status(unsigned int port) +{ + return inb(port) & 0x01; +} + +#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d)) +static int __wait_on_ibf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = ibf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = ibf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d)) +static int __wait_on_obf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = obf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = obf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +/* + * This allows the kernel to run Embedded Controller commands. The EC is + * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the + * available EC commands are here: + * <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while + * OpenFirmware's source is available, the EC's is not. + */ +int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, + unsigned char *outbuf, size_t outlen) +{ + unsigned long flags; + int ret = -EIO; + int i; + int restarts = 0; + + spin_lock_irqsave(&ec_lock, flags); + + /* Clear OBF */ + for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++) + inb(0x68); + if (i == 10) { + printk(KERN_ERR "olpc-ec: timeout while attempting to " + "clear OBF flag!\n"); + goto err; + } + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to " + "quiesce!\n"); + goto err; + } + +restart: + /* + * Note that if we time out during any IBF checks, that's a failure; + * we have to return. There's no way for the kernel to clear that. + * + * If we time out during an OBF check, we can restart the command; + * reissuing it will clear the OBF flag, and we should be alright. + * The OBF flag will sometimes misbehave due to what we believe + * is a hardware quirk.. + */ + pr_devel("olpc-ec: running cmd 0x%x\n", cmd); + outb(cmd, 0x6c); + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to read " + "command!\n"); + goto err; + } + + if (inbuf && inlen) { + /* write data to EC */ + for (i = 0; i < inlen; i++) { + pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); + outb(inbuf[i], 0x68); + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC accept data!\n"); + goto err; + } + } + } + if (outbuf && outlen) { + /* read data from EC */ + for (i = 0; i < outlen; i++) { + if (wait_on_obf(0x6c, 1)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC to provide data!\n"); + if (restarts++ < 10) + goto restart; + goto err; + } + outbuf[i] = inb(0x68); + pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); + } + } + + ret = 0; +err: + spin_unlock_irqrestore(&ec_lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(olpc_ec_cmd); + +void olpc_ec_wakeup_set(u16 value) +{ + ec_wakeup_mask |= value; +} +EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set); + +void olpc_ec_wakeup_clear(u16 value) +{ + ec_wakeup_mask &= ~value; +} +EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear); + +/* + * Returns true if the compile and runtime configurations allow for EC events + * to wake the system. + */ +bool olpc_ec_wakeup_available(void) +{ + if (!machine_is_olpc()) + return false; + + /* + * XO-1 EC wakeups are available when olpc-xo1-sci driver is + * compiled in + */ +#ifdef CONFIG_OLPC_XO1_SCI + if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */ + return true; +#endif + + /* + * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is + * compiled in + */ +#ifdef CONFIG_OLPC_XO15_SCI + if (olpc_platform_info.boardrev >= olpc_board_pre(0xd0)) /* XO-1.5 */ + return true; +#endif + + return false; +} +EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available); + +int olpc_ec_mask_write(u16 bits) +{ + if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { + __be16 ec_word = cpu_to_be16(bits); + return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *) &ec_word, 2, + NULL, 0); + } else { + unsigned char ec_byte = bits & 0xff; + return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0); + } +} +EXPORT_SYMBOL_GPL(olpc_ec_mask_write); + +int olpc_ec_sci_query(u16 *sci_value) +{ + int ret; + + if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { + __be16 ec_word; + ret = olpc_ec_cmd(EC_EXT_SCI_QUERY, + NULL, 0, (void *) &ec_word, 2); + if (ret == 0) + *sci_value = be16_to_cpu(ec_word); + } else { + unsigned char ec_byte; + ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1); + if (ret == 0) + *sci_value = ec_byte; + } + + return ret; +} +EXPORT_SYMBOL_GPL(olpc_ec_sci_query); + +static ssize_t ec_debugfs_cmd_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) +{ + int i, m; + unsigned char ec_cmd[EC_MAX_CMD_ARGS]; + unsigned int ec_cmd_int[EC_MAX_CMD_ARGS]; + char cmdbuf[64]; + int ec_cmd_bytes; + + mutex_lock(&ec_debugfs_cmd_lock); + + size = simple_write_to_buffer(cmdbuf, sizeof(cmdbuf), ppos, buf, size); + + m = sscanf(cmdbuf, "%x:%u %x %x %x %x %x", &ec_cmd_int[0], + &ec_debugfs_resp_bytes, + &ec_cmd_int[1], &ec_cmd_int[2], &ec_cmd_int[3], + &ec_cmd_int[4], &ec_cmd_int[5]); + if (m < 2 || ec_debugfs_resp_bytes > EC_MAX_CMD_REPLY) { + /* reset to prevent overflow on read */ + ec_debugfs_resp_bytes = 0; + + printk(KERN_DEBUG "olpc-ec: bad ec cmd: " + "cmd:response-count [arg1 [arg2 ...]]\n"); + size = -EINVAL; + goto out; + } + + /* convert scanf'd ints to char */ + ec_cmd_bytes = m - 2; + for (i = 0; i <= ec_cmd_bytes; i++) + ec_cmd[i] = ec_cmd_int[i]; + + printk(KERN_DEBUG "olpc-ec: debugfs cmd 0x%02x with %d args " + "%02x %02x %02x %02x %02x, want %d returns\n", + ec_cmd[0], ec_cmd_bytes, ec_cmd[1], ec_cmd[2], ec_cmd[3], + ec_cmd[4], ec_cmd[5], ec_debugfs_resp_bytes); + + olpc_ec_cmd(ec_cmd[0], (ec_cmd_bytes == 0) ? NULL : &ec_cmd[1], + ec_cmd_bytes, ec_debugfs_resp, ec_debugfs_resp_bytes); + + printk(KERN_DEBUG "olpc-ec: response " + "%02x %02x %02x %02x %02x %02x %02x %02x (%d bytes expected)\n", + ec_debugfs_resp[0], ec_debugfs_resp[1], ec_debugfs_resp[2], + ec_debugfs_resp[3], ec_debugfs_resp[4], ec_debugfs_resp[5], + ec_debugfs_resp[6], ec_debugfs_resp[7], ec_debugfs_resp_bytes); + +out: + mutex_unlock(&ec_debugfs_cmd_lock); + return size; +} + +static ssize_t ec_debugfs_cmd_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + unsigned int i, r; + char *rp; + char respbuf[64]; + + mutex_lock(&ec_debugfs_cmd_lock); + rp = respbuf; + rp += sprintf(rp, "%02x", ec_debugfs_resp[0]); + for (i = 1; i < ec_debugfs_resp_bytes; i++) + rp += sprintf(rp, ", %02x", ec_debugfs_resp[i]); + mutex_unlock(&ec_debugfs_cmd_lock); + rp += sprintf(rp, "\n"); + + r = rp - respbuf; + return simple_read_from_buffer(buf, size, ppos, respbuf, r); +} + +static const struct file_operations ec_debugfs_genops = { + .write = ec_debugfs_cmd_write, + .read = ec_debugfs_cmd_read, +}; + +static void setup_debugfs(void) +{ + ec_debugfs_dir = debugfs_create_dir("olpc-ec", 0); + if (ec_debugfs_dir == ERR_PTR(-ENODEV)) + return; + + debugfs_create_file("cmd", 0600, ec_debugfs_dir, NULL, + &ec_debugfs_genops); +} + +static int olpc_ec_suspend(void) +{ + return olpc_ec_mask_write(ec_wakeup_mask); +} + +static bool __init check_ofw_architecture(struct device_node *root) +{ + const char *olpc_arch; + int propsize; + + olpc_arch = of_get_property(root, "architecture", &propsize); + return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; +} + +static u32 __init get_board_revision(struct device_node *root) +{ + int propsize; + const __be32 *rev; + + rev = of_get_property(root, "board-revision-int", &propsize); + if (propsize != 4) + return 0; + + return be32_to_cpu(*rev); +} + +static bool __init platform_detect(void) +{ + struct device_node *root = of_find_node_by_path("/"); + bool success; + + if (!root) + return false; + + success = check_ofw_architecture(root); + if (success) { + olpc_platform_info.boardrev = get_board_revision(root); + olpc_platform_info.flags |= OLPC_F_PRESENT; + } + + of_node_put(root); + return success; +} + +static int __init add_xo1_platform_devices(void) +{ + struct platform_device *pdev; + + pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + return 0; +} + +static struct syscore_ops olpc_syscore_ops = { + .suspend = olpc_ec_suspend, +}; + +static int __init olpc_init(void) +{ + int r = 0; + + if (!olpc_ofw_present() || !platform_detect()) + return 0; + + spin_lock_init(&ec_lock); + + /* assume B1 and above models always have a DCON */ + if (olpc_board_at_least(olpc_board(0xb1))) + olpc_platform_info.flags |= OLPC_F_DCON; + + /* get the EC revision */ + olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, + (unsigned char *) &olpc_platform_info.ecver, 1); + +#ifdef CONFIG_PCI_OLPC + /* If the VSA exists let it emulate PCI, if not emulate in kernel. + * XO-1 only. */ + if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) && + !cs5535_has_vsa2()) + x86_init.pci.arch_init = pci_olpc_init; +#endif + /* EC version 0x5f adds support for wide SCI mask */ + if (olpc_platform_info.ecver >= 0x5f) + olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI; + + printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", + ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", + olpc_platform_info.boardrev >> 4, + olpc_platform_info.ecver); + + if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */ + r = add_xo1_platform_devices(); + if (r) + return r; + } + + register_syscore_ops(&olpc_syscore_ops); + setup_debugfs(); + + return 0; +} + +postcore_initcall(olpc_init); diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c new file mode 100644 index 00000000..d6ee9298 --- /dev/null +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -0,0 +1,304 @@ +/* + * OLPC-specific OFW device tree support code. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996-2005 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * Adapted for sparc by David S. Miller davem@davemloft.net + * Adapted for x86/OLPC by Andres Salomon <dilinger@queued.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/bootmem.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/of_pdt.h> +#include <asm/olpc.h> +#include <asm/olpc_ofw.h> + +static phandle __init olpc_dt_getsibling(phandle node) +{ + const void *args[] = { (void *)node }; + void *res[] = { &node }; + + if ((s32)node == -1) + return 0; + + if (olpc_ofw("peer", args, res) || (s32)node == -1) + return 0; + + return node; +} + +static phandle __init olpc_dt_getchild(phandle node) +{ + const void *args[] = { (void *)node }; + void *res[] = { &node }; + + if ((s32)node == -1) + return 0; + + if (olpc_ofw("child", args, res) || (s32)node == -1) { + pr_err("PROM: %s: fetching child failed!\n", __func__); + return 0; + } + + return node; +} + +static int __init olpc_dt_getproplen(phandle node, const char *prop) +{ + const void *args[] = { (void *)node, prop }; + int len; + void *res[] = { &len }; + + if ((s32)node == -1) + return -1; + + if (olpc_ofw("getproplen", args, res)) { + pr_err("PROM: %s: getproplen failed!\n", __func__); + return -1; + } + + return len; +} + +static int __init olpc_dt_getproperty(phandle node, const char *prop, + char *buf, int bufsize) +{ + int plen; + + plen = olpc_dt_getproplen(node, prop); + if (plen > bufsize || plen < 1) { + return -1; + } else { + const void *args[] = { (void *)node, prop, buf, (void *)plen }; + void *res[] = { &plen }; + + if (olpc_ofw("getprop", args, res)) { + pr_err("PROM: %s: getprop failed!\n", __func__); + return -1; + } + } + + return plen; +} + +static int __init olpc_dt_nextprop(phandle node, char *prev, char *buf) +{ + const void *args[] = { (void *)node, prev, buf }; + int success; + void *res[] = { &success }; + + buf[0] = '\0'; + + if ((s32)node == -1) + return -1; + + if (olpc_ofw("nextprop", args, res) || success != 1) + return -1; + + return 0; +} + +static int __init olpc_dt_pkg2path(phandle node, char *buf, + const int buflen, int *len) +{ + const void *args[] = { (void *)node, buf, (void *)buflen }; + void *res[] = { len }; + + if ((s32)node == -1) + return -1; + + if (olpc_ofw("package-to-path", args, res) || *len < 1) + return -1; + + return 0; +} + +static unsigned int prom_early_allocated __initdata; + +void * __init prom_early_alloc(unsigned long size) +{ + static u8 *mem; + static size_t free_mem; + void *res; + + if (free_mem < size) { + const size_t chunk_size = max(PAGE_SIZE, size); + + /* + * To mimimize the number of allocations, grab at least + * PAGE_SIZE of memory (that's an arbitrary choice that's + * fast enough on the platforms we care about while minimizing + * wasted bootmem) and hand off chunks of it to callers. + */ + res = alloc_bootmem(chunk_size); + BUG_ON(!res); + prom_early_allocated += chunk_size; + memset(res, 0, chunk_size); + free_mem = chunk_size; + mem = res; + } + + /* allocate from the local cache */ + free_mem -= size; + res = mem; + mem += size; + return res; +} + +static struct of_pdt_ops prom_olpc_ops __initdata = { + .nextprop = olpc_dt_nextprop, + .getproplen = olpc_dt_getproplen, + .getproperty = olpc_dt_getproperty, + .getchild = olpc_dt_getchild, + .getsibling = olpc_dt_getsibling, + .pkg2path = olpc_dt_pkg2path, +}; + +static phandle __init olpc_dt_finddevice(const char *path) +{ + phandle node; + const void *args[] = { path }; + void *res[] = { &node }; + + if (olpc_ofw("finddevice", args, res)) { + pr_err("olpc_dt: finddevice failed!\n"); + return 0; + } + + if ((s32) node == -1) + return 0; + + return node; +} + +static int __init olpc_dt_interpret(const char *words) +{ + int result; + const void *args[] = { words }; + void *res[] = { &result }; + + if (olpc_ofw("interpret", args, res)) { + pr_err("olpc_dt: interpret failed!\n"); + return -1; + } + + return result; +} + +/* + * Extract board revision directly from OFW device tree. + * We can't use olpc_platform_info because that hasn't been set up yet. + */ +static u32 __init olpc_dt_get_board_revision(void) +{ + phandle node; + __be32 rev; + int r; + + node = olpc_dt_finddevice("/"); + if (!node) + return 0; + + r = olpc_dt_getproperty(node, "board-revision-int", + (char *) &rev, sizeof(rev)); + if (r < 0) + return 0; + + return be32_to_cpu(rev); +} + +void __init olpc_dt_fixup(void) +{ + int r; + char buf[64]; + phandle node; + u32 board_rev; + + node = olpc_dt_finddevice("/battery@0"); + if (!node) + return; + + /* + * If the battery node has a compatible property, we are running a new + * enough firmware and don't have fixups to make. + */ + r = olpc_dt_getproperty(node, "compatible", buf, sizeof(buf)); + if (r > 0) + return; + + pr_info("PROM DT: Old firmware detected, applying fixes\n"); + + /* Add olpc,xo1-battery compatible marker to battery node */ + olpc_dt_interpret("\" /battery@0\" find-device" + " \" olpc,xo1-battery\" +compatible" + " device-end"); + + board_rev = olpc_dt_get_board_revision(); + if (!board_rev) + return; + + if (board_rev >= olpc_board_pre(0xd0)) { + /* XO-1.5: add dcon device */ + olpc_dt_interpret("\" /pci/display@1\" find-device" + " new-device" + " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible" + " finish-device device-end"); + } else { + /* XO-1: add dcon device, mark RTC as olpc,xo1-rtc */ + olpc_dt_interpret("\" /pci/display@1,1\" find-device" + " new-device" + " \" dcon\" device-name \" olpc,xo1-dcon\" +compatible" + " finish-device device-end" + " \" /rtc\" find-device" + " \" olpc,xo1-rtc\" +compatible" + " device-end"); + } +} + +void __init olpc_dt_build_devicetree(void) +{ + phandle root; + + if (!olpc_ofw_is_installed()) + return; + + olpc_dt_fixup(); + + root = olpc_dt_getsibling(0); + if (!root) { + pr_err("PROM: unable to get root node from OFW!\n"); + return; + } + of_pdt_build_devicetree(root, &prom_olpc_ops); + + pr_info("PROM DT: Built device tree with %u bytes of memory.\n", + prom_early_allocated); +} + +/* A list of DT node/bus matches that we want to expose as platform devices */ +static struct of_device_id __initdata of_ids[] = { + { .compatible = "olpc,xo1-battery" }, + { .compatible = "olpc,xo1-dcon" }, + { .compatible = "olpc,xo1-rtc" }, + {}, +}; + +static int __init olpc_create_platform_devices(void) +{ + if (machine_is_olpc()) + return of_platform_bus_probe(NULL, of_ids, NULL); + else + return 0; +} +device_initcall(olpc_create_platform_devices); diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c new file mode 100644 index 00000000..e7604f62 --- /dev/null +++ b/arch/x86/platform/olpc/olpc_ofw.c @@ -0,0 +1,117 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/olpc_ofw.h> + +/* address of OFW callback interface; will be NULL if OFW isn't found */ +static int (*olpc_ofw_cif)(int *); + +/* page dir entry containing OFW's pgdir table; filled in by head_32.S */ +u32 olpc_ofw_pgd __initdata; + +static DEFINE_SPINLOCK(ofw_lock); + +#define MAXARGS 10 + +void __init setup_olpc_ofw_pgd(void) +{ + pgd_t *base, *ofw_pde; + + if (!olpc_ofw_cif) + return; + + /* fetch OFW's PDE */ + base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); + if (!base) { + printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n"); + olpc_ofw_cif = NULL; + return; + } + ofw_pde = &base[OLPC_OFW_PDE_NR]; + + /* install OFW's PDE permanently into the kernel's pgtable */ + set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde); + /* implicit optimization barrier here due to uninline function return */ + + early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); +} + +int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, + void **res) +{ + int ofw_args[MAXARGS + 3]; + unsigned long flags; + int ret, i, *p; + + BUG_ON(nr_args + nr_res > MAXARGS); + + if (!olpc_ofw_cif) + return -EIO; + + ofw_args[0] = (int)name; + ofw_args[1] = nr_args; + ofw_args[2] = nr_res; + + p = &ofw_args[3]; + for (i = 0; i < nr_args; i++, p++) + *p = (int)args[i]; + + /* call into ofw */ + spin_lock_irqsave(&ofw_lock, flags); + ret = olpc_ofw_cif(ofw_args); + spin_unlock_irqrestore(&ofw_lock, flags); + + if (!ret) { + for (i = 0; i < nr_res; i++, p++) + *((int *)res[i]) = *p; + } + + return ret; +} +EXPORT_SYMBOL_GPL(__olpc_ofw); + +bool olpc_ofw_present(void) +{ + return olpc_ofw_cif != NULL; +} +EXPORT_SYMBOL_GPL(olpc_ofw_present); + +/* OFW cif _should_ be above this address */ +#define OFW_MIN 0xff000000 + +/* OFW starts on a 1MB boundary */ +#define OFW_BOUND (1<<20) + +void __init olpc_ofw_detect(void) +{ + struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header; + unsigned long start; + + /* ensure OFW booted us by checking for "OFW " string */ + if (hdr->ofw_magic != OLPC_OFW_SIG) + return; + + olpc_ofw_cif = (int (*)(int *))hdr->cif_handler; + + if ((unsigned long)olpc_ofw_cif < OFW_MIN) { + printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n", + (unsigned long)olpc_ofw_cif); + olpc_ofw_cif = NULL; + return; + } + + /* determine where OFW starts in memory */ + start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND); + printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n", + (unsigned long)olpc_ofw_cif, (-start) >> 20); + reserve_top_address(-start); +} + +bool __init olpc_ofw_is_installed(void) +{ + return olpc_ofw_cif != NULL; +} diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S new file mode 100644 index 00000000..948deb28 --- /dev/null +++ b/arch/x86/platform/olpc/xo1-wakeup.S @@ -0,0 +1,124 @@ +.text +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/page.h> +#include <asm/pgtable_32.h> + + .macro writepost,value + movb $0x34, %al + outb %al, $0x70 + movb $\value, %al + outb %al, $0x71 + .endm + +wakeup_start: + # OFW lands us here, running in protected mode, with a + # kernel-compatible GDT already setup. + + # Clear any dangerous flags + pushl $0 + popfl + + writepost 0x31 + + # Set up %cr3 + movl $initial_page_table - __PAGE_OFFSET, %eax + movl %eax, %cr3 + + movl saved_cr4, %eax + movl %eax, %cr4 + + movl saved_cr0, %eax + movl %eax, %cr0 + + # Control registers were modified, pipeline resync is needed + jmp 1f +1: + + movw $__KERNEL_DS, %ax + movw %ax, %ss + movw %ax, %ds + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + + lgdt saved_gdt + lidt saved_idt + lldt saved_ldt + ljmp $(__KERNEL_CS),$1f +1: + movl %cr3, %eax + movl %eax, %cr3 + wbinvd + + # Go back to the return point + jmp ret_point + +save_registers: + sgdt saved_gdt + sidt saved_idt + sldt saved_ldt + + pushl %edx + movl %cr4, %edx + movl %edx, saved_cr4 + + movl %cr0, %edx + movl %edx, saved_cr0 + + popl %edx + + movl %ebx, saved_context_ebx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + + pushfl + popl saved_context_eflags + + ret + +restore_registers: + movl saved_context_ebp, %ebp + movl saved_context_ebx, %ebx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + + pushl saved_context_eflags + popfl + + ret + +ENTRY(do_olpc_suspend_lowlevel) + call save_processor_state + call save_registers + + # This is the stack context we want to remember + movl %esp, saved_context_esp + + pushl $3 + call xo1_do_sleep + + jmp wakeup_start + .p2align 4,,7 +ret_point: + movl saved_context_esp, %esp + + writepost 0x32 + + call restore_registers + call restore_processor_state + ret + +.data +saved_gdt: .long 0,0 +saved_idt: .long 0,0 +saved_ldt: .long 0 +saved_cr4: .long 0 +saved_cr0: .long 0 +saved_context_esp: .long 0 +saved_context_edi: .long 0 +saved_context_esi: .long 0 +saved_context_ebx: .long 0 +saved_context_ebp: .long 0 +saved_context_eflags: .long 0 diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile new file mode 100644 index 00000000..762b4c7f --- /dev/null +++ b/arch/x86/platform/scx200/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_SCx200) += scx200.o +scx200-y += scx200_32.o diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c new file mode 100644 index 00000000..7a9ad30d --- /dev/null +++ b/arch/x86/platform/scx200/scx200_32.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com> + * + * National Semiconductor SCx200 support. + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/pci.h> + +#include <linux/scx200.h> +#include <linux/scx200_gpio.h> + +/* Verify that the configuration block really is there */ +#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) + +MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>"); +MODULE_DESCRIPTION("NatSemi SCx200 Driver"); +MODULE_LICENSE("GPL"); + +unsigned scx200_gpio_base = 0; +unsigned long scx200_gpio_shadow[2]; + +unsigned scx200_cb_base = 0; + +static struct pci_device_id scx200_tbl[] = { + { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, + { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, + { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, + { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, + { }, +}; +MODULE_DEVICE_TABLE(pci,scx200_tbl); + +static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *); + +static struct pci_driver scx200_pci_driver = { + .name = "scx200", + .id_table = scx200_tbl, + .probe = scx200_probe, +}; + +static DEFINE_MUTEX(scx200_gpio_config_lock); + +static void __devinit scx200_init_shadow(void) +{ + int bank; + + /* read the current values driven on the GPIO signals */ + for (bank = 0; bank < 2; ++bank) + scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); +} + +static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + unsigned base; + + if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || + pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { + base = pci_resource_start(pdev, 0); + pr_info("GPIO base 0x%x\n", base); + + if (!request_region(base, SCx200_GPIO_SIZE, + "NatSemi SCx200 GPIO")) { + pr_err("can't allocate I/O for GPIOs\n"); + return -EBUSY; + } + + scx200_gpio_base = base; + scx200_init_shadow(); + + } else { + /* find the base of the Configuration Block */ + if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) { + scx200_cb_base = SCx200_CB_BASE_FIXED; + } else { + pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base); + if (scx200_cb_probe(base)) { + scx200_cb_base = base; + } else { + pr_warn("Configuration Block not found\n"); + return -ENODEV; + } + } + pr_info("Configuration Block base 0x%x\n", scx200_cb_base); + } + + return 0; +} + +u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) +{ + u32 config, new_config; + + mutex_lock(&scx200_gpio_config_lock); + + outl(index, scx200_gpio_base + 0x20); + config = inl(scx200_gpio_base + 0x24); + + new_config = (config & mask) | bits; + outl(new_config, scx200_gpio_base + 0x24); + + mutex_unlock(&scx200_gpio_config_lock); + + return config; +} + +static int __init scx200_init(void) +{ + pr_info("NatSemi SCx200 Driver\n"); + return pci_register_driver(&scx200_pci_driver); +} + +static void __exit scx200_cleanup(void) +{ + pci_unregister_driver(&scx200_pci_driver); + release_region(scx200_gpio_base, SCx200_GPIO_SIZE); +} + +module_init(scx200_init); +module_exit(scx200_cleanup); + +EXPORT_SYMBOL(scx200_gpio_base); +EXPORT_SYMBOL(scx200_gpio_shadow); +EXPORT_SYMBOL(scx200_gpio_configure); +EXPORT_SYMBOL(scx200_cb_base); diff --git a/arch/x86/platform/sfi/Makefile b/arch/x86/platform/sfi/Makefile new file mode 100644 index 00000000..cc5db116 --- /dev/null +++ b/arch/x86/platform/sfi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SFI) += sfi.o diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c new file mode 100644 index 00000000..7785b72e --- /dev/null +++ b/arch/x86/platform/sfi/sfi.c @@ -0,0 +1,109 @@ +/* + * sfi.c - x86 architecture SFI support. + * + * Copyright (c) 2009, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#define KMSG_COMPONENT "SFI" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/acpi.h> +#include <linux/init.h> +#include <linux/sfi.h> +#include <linux/io.h> + +#include <asm/io_apic.h> +#include <asm/mpspec.h> +#include <asm/setup.h> +#include <asm/apic.h> + +#ifdef CONFIG_X86_LOCAL_APIC +static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; + +/* All CPUs enumerated by SFI must be present and enabled */ +static void __cpuinit mp_sfi_register_lapic(u8 id) +{ + if (MAX_LOCAL_APIC - id <= 0) { + pr_warning("Processor #%d invalid (max %d)\n", + id, MAX_LOCAL_APIC); + return; + } + + pr_info("registering lapic[%d]\n", id); + + generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR))); +} + +static int __init sfi_parse_cpus(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_cpu_table_entry *pentry; + int i; + int cpu_num; + + sb = (struct sfi_table_simple *)table; + cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry); + pentry = (struct sfi_cpu_table_entry *)sb->pentry; + + for (i = 0; i < cpu_num; i++) { + mp_sfi_register_lapic(pentry->apic_id); + pentry++; + } + + smp_found_config = 1; + return 0; +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC + +static int __init sfi_parse_ioapic(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_apic_table_entry *pentry; + int i, num; + + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry); + pentry = (struct sfi_apic_table_entry *)sb->pentry; + + for (i = 0; i < num; i++) { + mp_register_ioapic(i, pentry->phys_addr, gsi_top); + pentry++; + } + + WARN(pic_mode, KERN_WARNING + "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n"); + pic_mode = 0; + return 0; +} +#endif /* CONFIG_X86_IO_APIC */ + +/* + * sfi_platform_init(): register lapics & io-apics + */ +int __init sfi_platform_init(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + register_lapic_address(sfi_lapic_addr); + sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus); +#endif +#ifdef CONFIG_X86_IO_APIC + sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic); +#endif + return 0; +} diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile new file mode 100644 index 00000000..6c40995f --- /dev/null +++ b/arch/x86/platform/uv/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c new file mode 100644 index 00000000..76661213 --- /dev/null +++ b/arch/x86/platform/uv/bios_uv.c @@ -0,0 +1,216 @@ +/* + * BIOS run time interface routines. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson <rja@sgi.com> + */ + +#include <linux/efi.h> +#include <linux/export.h> +#include <asm/efi.h> +#include <linux/io.h> +#include <asm/uv/bios.h> +#include <asm/uv/uv_hub.h> + +static struct uv_systab uv_systab; + +s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +{ + struct uv_systab *tab = &uv_systab; + s64 ret; + + if (!tab->function) + /* + * BIOS does not support UV systab + */ + return BIOS_STATUS_UNIMPLEMENTED; + + ret = efi_call6((void *)__va(tab->function), (u64)which, + a1, a2, a3, a4, a5); + return ret; +} +EXPORT_SYMBOL_GPL(uv_bios_call); + +s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) +{ + unsigned long bios_flags; + s64 ret; + + local_irq_save(bios_flags); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + local_irq_restore(bios_flags); + + return ret; +} + +s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) +{ + s64 ret; + + preempt_disable(); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + preempt_enable(); + + return ret; +} + + +long sn_partition_id; +EXPORT_SYMBOL_GPL(sn_partition_id); +long sn_coherency_id; +EXPORT_SYMBOL_GPL(sn_coherency_id); +long sn_region_size; +EXPORT_SYMBOL_GPL(sn_region_size); +long system_serial_number; +EXPORT_SYMBOL_GPL(system_serial_number); +int uv_type; +EXPORT_SYMBOL_GPL(uv_type); + + +s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, + long *region, long *ssn) +{ + s64 ret; + u64 v0, v1; + union partition_info_u part; + + ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, + (u64)(&v0), (u64)(&v1), 0, 0); + if (ret != BIOS_STATUS_SUCCESS) + return ret; + + part.val = v0; + if (uvtype) + *uvtype = part.hub_version; + if (partid) + *partid = part.partition_id; + if (coher) + *coher = part.coherence_id; + if (region) + *region = part.region_size; + if (ssn) + *ssn = v1; + return ret; +} +EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); + +int +uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, + unsigned long *intr_mmr_offset) +{ + u64 watchlist; + s64 ret; + + /* + * bios returns watchlist number or negative error number. + */ + ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, + mq_size, (u64)intr_mmr_offset, + (u64)&watchlist, 0); + if (ret < BIOS_STATUS_SUCCESS) + return ret; + + return watchlist; +} +EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); + +int +uv_bios_mq_watchlist_free(int blade, int watchlist_num) +{ + return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, + blade, watchlist_num, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); + +s64 +uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) +{ + return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, + perms, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); + +s64 +uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) +{ + s64 ret; + + ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, + (u64)addr, buf, (u64)len, 0); + return ret; +} +EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); + +s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) +{ + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, + (u64)ticks_per_second, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_freq_base); + +/* + * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target + * @decode: true to enable target, false to disable target + * @domain: PCI domain number + * @bus: PCI bus number + * + * Returns: + * 0: Success + * -EINVAL: Invalid domain or bus number + * -ENOSYS: Capability not available + * -EBUSY: Legacy VGA I/O cannot be retargeted at this time + */ +int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) +{ + return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, + (u64)decode, (u64)domain, (u64)bus, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); + + +#ifdef CONFIG_EFI +void uv_bios_init(void) +{ + struct uv_systab *tab; + + if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || + (efi.uv_systab == (unsigned long)NULL)) { + printk(KERN_CRIT "No EFI UV System Table.\n"); + uv_systab.function = (unsigned long)NULL; + return; + } + + tab = (struct uv_systab *)ioremap(efi.uv_systab, + sizeof(struct uv_systab)); + if (strncmp(tab->signature, "UVST", 4) != 0) + printk(KERN_ERR "bad signature in UV system table!"); + + /* + * Copy table to permanent spot for later use. + */ + memcpy(&uv_systab, tab, sizeof(struct uv_systab)); + iounmap(tab); + + printk(KERN_INFO "EFI UV System Table Revision %d\n", + uv_systab.revision); +} +#else /* !CONFIG_EFI */ + +void uv_bios_init(void) { } +#endif diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c new file mode 100644 index 00000000..59880afa --- /dev/null +++ b/arch/x86/platform/uv/tlb_uv.c @@ -0,0 +1,2122 @@ +/* + * SGI UltraViolet TLB flush routines. + * + * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. + * + * This code is released under the GNU General Public License version 2 or + * later. + */ +#include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/delay.h> + +#include <asm/mmu_context.h> +#include <asm/uv/uv.h> +#include <asm/uv/uv_mmrs.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_bau.h> +#include <asm/apic.h> +#include <asm/idle.h> +#include <asm/tsc.h> +#include <asm/irq_vectors.h> +#include <asm/timer.h> + +/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ +static int timeout_base_ns[] = { + 20, + 160, + 1280, + 10240, + 81920, + 655360, + 5242880, + 167772160 +}; + +static int timeout_us; +static int nobau; +static int baudisabled; +static spinlock_t disable_lock; +static cycles_t congested_cycles; + +/* tunables: */ +static int max_concurr = MAX_BAU_CONCURRENT; +static int max_concurr_const = MAX_BAU_CONCURRENT; +static int plugged_delay = PLUGGED_DELAY; +static int plugsb4reset = PLUGSB4RESET; +static int timeoutsb4reset = TIMEOUTSB4RESET; +static int ipi_reset_limit = IPI_RESET_LIMIT; +static int complete_threshold = COMPLETE_THRESHOLD; +static int congested_respns_us = CONGESTED_RESPONSE_US; +static int congested_reps = CONGESTED_REPS; +static int congested_period = CONGESTED_PERIOD; + +static struct tunables tunables[] = { + {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ + {&plugged_delay, PLUGGED_DELAY}, + {&plugsb4reset, PLUGSB4RESET}, + {&timeoutsb4reset, TIMEOUTSB4RESET}, + {&ipi_reset_limit, IPI_RESET_LIMIT}, + {&complete_threshold, COMPLETE_THRESHOLD}, + {&congested_respns_us, CONGESTED_RESPONSE_US}, + {&congested_reps, CONGESTED_REPS}, + {&congested_period, CONGESTED_PERIOD} +}; + +static struct dentry *tunables_dir; +static struct dentry *tunables_file; + +/* these correspond to the statistics printed by ptc_seq_show() */ +static char *stat_description[] = { + "sent: number of shootdown messages sent", + "stime: time spent sending messages", + "numuvhubs: number of hubs targeted with shootdown", + "numuvhubs16: number times 16 or more hubs targeted", + "numuvhubs8: number times 8 or more hubs targeted", + "numuvhubs4: number times 4 or more hubs targeted", + "numuvhubs2: number times 2 or more hubs targeted", + "numuvhubs1: number times 1 hub targeted", + "numcpus: number of cpus targeted with shootdown", + "dto: number of destination timeouts", + "retries: destination timeout retries sent", + "rok: : destination timeouts successfully retried", + "resetp: ipi-style resource resets for plugs", + "resett: ipi-style resource resets for timeouts", + "giveup: fall-backs to ipi-style shootdowns", + "sto: number of source timeouts", + "bz: number of stay-busy's", + "throt: number times spun in throttle", + "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE", + "recv: shootdown messages received", + "rtime: time spent processing messages", + "all: shootdown all-tlb messages", + "one: shootdown one-tlb messages", + "mult: interrupts that found multiple messages", + "none: interrupts that found no messages", + "retry: number of retry messages processed", + "canc: number messages canceled by retries", + "nocan: number retries that found nothing to cancel", + "reset: number of ipi-style reset requests processed", + "rcan: number messages canceled by reset requests", + "disable: number times use of the BAU was disabled", + "enable: number times use of the BAU was re-enabled" +}; + +static int __init +setup_nobau(char *arg) +{ + nobau = 1; + return 0; +} +early_param("nobau", setup_nobau); + +/* base pnode in this partition */ +static int uv_base_pnode __read_mostly; + +static DEFINE_PER_CPU(struct ptc_stats, ptcstats); +static DEFINE_PER_CPU(struct bau_control, bau_control); +static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); + +/* + * Determine the first node on a uvhub. 'Nodes' are used for kernel + * memory allocation. + */ +static int __init uvhub_to_first_node(int uvhub) +{ + int node, b; + + for_each_online_node(node) { + b = uv_node_to_blade_id(node); + if (uvhub == b) + return node; + } + return -1; +} + +/* + * Determine the apicid of the first cpu on a uvhub. + */ +static int __init uvhub_to_first_apicid(int uvhub) +{ + int cpu; + + for_each_present_cpu(cpu) + if (uvhub == uv_cpu_to_blade_id(cpu)) + return per_cpu(x86_cpu_to_apicid, cpu); + return -1; +} + +/* + * Free a software acknowledge hardware resource by clearing its Pending + * bit. This will return a reply to the sender. + * If the message has timed out, a reply has already been sent by the + * hardware but the resource has not been released. In that case our + * clear of the Timeout bit (as well) will free the resource. No reply will + * be sent (the hardware will only do one reply per message). + */ +static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp, + int do_acknowledge) +{ + unsigned long dw; + struct bau_pq_entry *msg; + + msg = mdp->msg; + if (!msg->canceled && do_acknowledge) { + dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; + write_mmr_sw_ack(dw); + } + msg->replied_to = 1; + msg->swack_vec = 0; +} + +/* + * Process the receipt of a RETRY message + */ +static void bau_process_retry_msg(struct msg_desc *mdp, + struct bau_control *bcp) +{ + int i; + int cancel_count = 0; + unsigned long msg_res; + unsigned long mmr = 0; + struct bau_pq_entry *msg = mdp->msg; + struct bau_pq_entry *msg2; + struct ptc_stats *stat = bcp->statp; + + stat->d_retries++; + /* + * cancel any message from msg+1 to the retry itself + */ + for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { + if (msg2 > mdp->queue_last) + msg2 = mdp->queue_first; + if (msg2 == msg) + break; + + /* same conditions for cancellation as do_reset */ + if ((msg2->replied_to == 0) && (msg2->canceled == 0) && + (msg2->swack_vec) && ((msg2->swack_vec & + msg->swack_vec) == 0) && + (msg2->sending_cpu == msg->sending_cpu) && + (msg2->msg_type != MSG_NOOP)) { + mmr = read_mmr_sw_ack(); + msg_res = msg2->swack_vec; + /* + * This is a message retry; clear the resources held + * by the previous message only if they timed out. + * If it has not timed out we have an unexpected + * situation to report. + */ + if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { + unsigned long mr; + /* + * Is the resource timed out? + * Make everyone ignore the cancelled message. + */ + msg2->canceled = 1; + stat->d_canceled++; + cancel_count++; + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; + write_mmr_sw_ack(mr); + } + } + } + if (!cancel_count) + stat->d_nocanceled++; +} + +/* + * Do all the things a cpu should do for a TLB shootdown message. + * Other cpu's may come here at the same time for this message. + */ +static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, + int do_acknowledge) +{ + short socket_ack_count = 0; + short *sp; + struct atomic_short *asp; + struct ptc_stats *stat = bcp->statp; + struct bau_pq_entry *msg = mdp->msg; + struct bau_control *smaster = bcp->socket_master; + + /* + * This must be a normal message, or retry of a normal message + */ + if (msg->address == TLB_FLUSH_ALL) { + local_flush_tlb(); + stat->d_alltlb++; + } else { + __flush_tlb_one(msg->address); + stat->d_onetlb++; + } + stat->d_requestee++; + + /* + * One cpu on each uvhub has the additional job on a RETRY + * of releasing the resource held by the message that is + * being retried. That message is identified by sending + * cpu number. + */ + if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) + bau_process_retry_msg(mdp, bcp); + + /* + * This is a swack message, so we have to reply to it. + * Count each responding cpu on the socket. This avoids + * pinging the count's cache line back and forth between + * the sockets. + */ + sp = &smaster->socket_acknowledge_count[mdp->msg_slot]; + asp = (struct atomic_short *)sp; + socket_ack_count = atom_asr(1, asp); + if (socket_ack_count == bcp->cpus_in_socket) { + int msg_ack_count; + /* + * Both sockets dump their completed count total into + * the message's count. + */ + smaster->socket_acknowledge_count[mdp->msg_slot] = 0; + asp = (struct atomic_short *)&msg->acknowledge_count; + msg_ack_count = atom_asr(socket_ack_count, asp); + + if (msg_ack_count == bcp->cpus_in_uvhub) { + /* + * All cpus in uvhub saw it; reply + * (unless we are in the UV2 workaround) + */ + reply_to_message(mdp, bcp, do_acknowledge); + } + } + + return; +} + +/* + * Determine the first cpu on a pnode. + */ +static int pnode_to_first_cpu(int pnode, struct bau_control *smaster) +{ + int cpu; + struct hub_and_pnode *hpp; + + for_each_present_cpu(cpu) { + hpp = &smaster->thp[cpu]; + if (pnode == hpp->pnode) + return cpu; + } + return -1; +} + +/* + * Last resort when we get a large number of destination timeouts is + * to clear resources held by a given cpu. + * Do this with IPI so that all messages in the BAU message queue + * can be identified by their nonzero swack_vec field. + * + * This is entered for a single cpu on the uvhub. + * The sender want's this uvhub to free a specific message's + * swack resources. + */ +static void do_reset(void *ptr) +{ + int i; + struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id()); + struct reset_args *rap = (struct reset_args *)ptr; + struct bau_pq_entry *msg; + struct ptc_stats *stat = bcp->statp; + + stat->d_resets++; + /* + * We're looking for the given sender, and + * will free its swack resource. + * If all cpu's finally responded after the timeout, its + * message 'replied_to' was set. + */ + for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { + unsigned long msg_res; + /* do_reset: same conditions for cancellation as + bau_process_retry_msg() */ + if ((msg->replied_to == 0) && + (msg->canceled == 0) && + (msg->sending_cpu == rap->sender) && + (msg->swack_vec) && + (msg->msg_type != MSG_NOOP)) { + unsigned long mmr; + unsigned long mr; + /* + * make everyone else ignore this message + */ + msg->canceled = 1; + /* + * only reset the resource if it is still pending + */ + mmr = read_mmr_sw_ack(); + msg_res = msg->swack_vec; + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; + if (mmr & msg_res) { + stat->d_rcanceled++; + write_mmr_sw_ack(mr); + } + } + } + return; +} + +/* + * Use IPI to get all target uvhubs to release resources held by + * a given sending cpu number. + */ +static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) +{ + int pnode; + int apnode; + int maskbits; + int sender = bcp->cpu; + cpumask_t *mask = bcp->uvhub_master->cpumask; + struct bau_control *smaster = bcp->socket_master; + struct reset_args reset_args; + + reset_args.sender = sender; + cpus_clear(*mask); + /* find a single cpu for each uvhub in this distribution mask */ + maskbits = sizeof(struct pnmask) * BITSPERBYTE; + /* each bit is a pnode relative to the partition base pnode */ + for (pnode = 0; pnode < maskbits; pnode++) { + int cpu; + if (!bau_uvhub_isset(pnode, distribution)) + continue; + apnode = pnode + bcp->partition_base_pnode; + cpu = pnode_to_first_cpu(apnode, smaster); + cpu_set(cpu, *mask); + } + + /* IPI all cpus; preemption is already disabled */ + smp_call_function_many(mask, do_reset, (void *)&reset_args, 1); + return; +} + +static inline unsigned long cycles_2_us(unsigned long long cyc) +{ + unsigned long long ns; + unsigned long us; + int cpu = smp_processor_id(); + + ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; + us = ns / 1000; + return us; +} + +/* + * wait for all cpus on this hub to finish their sends and go quiet + * leaves uvhub_quiesce set so that no new broadcasts are started by + * bau_flush_send_and_wait() + */ +static inline void quiesce_local_uvhub(struct bau_control *hmaster) +{ + atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce); +} + +/* + * mark this quiet-requestor as done + */ +static inline void end_uvhub_quiesce(struct bau_control *hmaster) +{ + atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce); +} + +static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift) +{ + unsigned long descriptor_status; + + descriptor_status = uv_read_local_mmr(mmr_offset); + descriptor_status >>= right_shift; + descriptor_status &= UV_ACT_STATUS_MASK; + return descriptor_status; +} + +/* + * Wait for completion of a broadcast software ack message + * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP + */ +static int uv1_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, + struct bau_control *bcp, long try) +{ + unsigned long descriptor_status; + cycles_t ttm; + struct ptc_stats *stat = bcp->statp; + + descriptor_status = uv1_read_status(mmr_offset, right_shift); + /* spin on the status MMR, waiting for it to go idle */ + while ((descriptor_status != DS_IDLE)) { + /* + * Our software ack messages may be blocked because + * there are no swack resources available. As long + * as none of them has timed out hardware will NACK + * our message and its state will stay IDLE. + */ + if (descriptor_status == DS_SOURCE_TIMEOUT) { + stat->s_stimeout++; + return FLUSH_GIVEUP; + } else if (descriptor_status == DS_DESTINATION_TIMEOUT) { + stat->s_dtimeout++; + ttm = get_cycles(); + + /* + * Our retries may be blocked by all destination + * swack resources being consumed, and a timeout + * pending. In that case hardware returns the + * ERROR that looks like a destination timeout. + */ + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { + bcp->conseccompletes = 0; + return FLUSH_RETRY_PLUGGED; + } + + bcp->conseccompletes = 0; + return FLUSH_RETRY_TIMEOUT; + } else { + /* + * descriptor_status is still BUSY + */ + cpu_relax(); + } + descriptor_status = uv1_read_status(mmr_offset, right_shift); + } + bcp->conseccompletes++; + return FLUSH_COMPLETE; +} + +/* + * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. + */ +static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) +{ + unsigned long descriptor_status; + unsigned long descriptor_status2; + + descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); + descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; + descriptor_status = (descriptor_status << 1) | descriptor_status2; + return descriptor_status; +} + +/* + * Return whether the status of the descriptor that is normally used for this + * cpu (the one indexed by its hub-relative cpu number) is busy. + * The status of the original 32 descriptors is always reflected in the 64 + * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0. + * The bit provided by the activation_status_2 register is irrelevant to + * the status if it is only being tested for busy or not busy. + */ +int normal_busy(struct bau_control *bcp) +{ + int cpu = bcp->uvhub_cpu; + int mmr_offset; + int right_shift; + + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; + right_shift = cpu * UV_ACT_STATUS_SIZE; + return (((((read_lmmr(mmr_offset) >> right_shift) & + UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY); +} + +/* + * Entered when a bau descriptor has gone into a permanent busy wait because + * of a hardware bug. + * Workaround the bug. + */ +int handle_uv2_busy(struct bau_control *bcp) +{ + int busy_one = bcp->using_desc; + int normal = bcp->uvhub_cpu; + int selected = -1; + int i; + unsigned long descriptor_status; + unsigned long status; + int mmr_offset; + struct bau_desc *bau_desc_old; + struct bau_desc *bau_desc_new; + struct bau_control *hmaster = bcp->uvhub_master; + struct ptc_stats *stat = bcp->statp; + cycles_t ttm; + + stat->s_uv2_wars++; + spin_lock(&hmaster->uvhub_lock); + /* try for the original first */ + if (busy_one != normal) { + if (!normal_busy(bcp)) + selected = normal; + } + if (selected < 0) { + /* can't use the normal, select an alternate */ + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; + descriptor_status = read_lmmr(mmr_offset); + + /* scan available descriptors 32-63 */ + for (i = 0; i < UV_CPUS_PER_AS; i++) { + if ((hmaster->inuse_map & (1 << i)) == 0) { + status = ((descriptor_status >> + (i * UV_ACT_STATUS_SIZE)) & + UV_ACT_STATUS_MASK) << 1; + if (status != UV2H_DESC_BUSY) { + selected = i + UV_CPUS_PER_AS; + break; + } + } + } + } + + if (busy_one != normal) + /* mark the busy alternate as not in-use */ + hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS)); + + if (selected >= 0) { + /* switch to the selected descriptor */ + if (selected != normal) { + /* set the selected alternate as in-use */ + hmaster->inuse_map |= + (1 << (selected - UV_CPUS_PER_AS)); + if (selected > stat->s_uv2_wars_hw) + stat->s_uv2_wars_hw = selected; + } + bau_desc_old = bcp->descriptor_base; + bau_desc_old += (ITEMS_PER_DESC * busy_one); + bcp->using_desc = selected; + bau_desc_new = bcp->descriptor_base; + bau_desc_new += (ITEMS_PER_DESC * selected); + *bau_desc_new = *bau_desc_old; + } else { + /* + * All are busy. Wait for the normal one for this cpu to + * free up. + */ + stat->s_uv2_war_waits++; + spin_unlock(&hmaster->uvhub_lock); + ttm = get_cycles(); + do { + cpu_relax(); + } while (normal_busy(bcp)); + spin_lock(&hmaster->uvhub_lock); + /* switch to the original descriptor */ + bcp->using_desc = normal; + bau_desc_old = bcp->descriptor_base; + bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc); + bcp->using_desc = (ITEMS_PER_DESC * normal); + bau_desc_new = bcp->descriptor_base; + bau_desc_new += (ITEMS_PER_DESC * normal); + *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */ + } + spin_unlock(&hmaster->uvhub_lock); + return FLUSH_RETRY_BUSYBUG; +} + +static int uv2_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, + struct bau_control *bcp, long try) +{ + unsigned long descriptor_stat; + cycles_t ttm; + int desc = bcp->using_desc; + long busy_reps = 0; + struct ptc_stats *stat = bcp->statp; + + descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc); + + /* spin on the status MMR, waiting for it to go idle */ + while (descriptor_stat != UV2H_DESC_IDLE) { + /* + * Our software ack messages may be blocked because + * there are no swack resources available. As long + * as none of them has timed out hardware will NACK + * our message and its state will stay IDLE. + */ + if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || + (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { + stat->s_stimeout++; + return FLUSH_GIVEUP; + } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) { + stat->s_strongnacks++; + bcp->conseccompletes = 0; + return FLUSH_GIVEUP; + } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { + stat->s_dtimeout++; + bcp->conseccompletes = 0; + return FLUSH_RETRY_TIMEOUT; + } else { + busy_reps++; + if (busy_reps > 1000000) { + /* not to hammer on the clock */ + busy_reps = 0; + ttm = get_cycles(); + if ((ttm - bcp->send_message) > + (bcp->clocks_per_100_usec)) { + return handle_uv2_busy(bcp); + } + } + /* + * descriptor_stat is still BUSY + */ + cpu_relax(); + } + descriptor_stat = uv2_read_status(mmr_offset, right_shift, + desc); + } + bcp->conseccompletes++; + return FLUSH_COMPLETE; +} + +/* + * There are 2 status registers; each and array[32] of 2 bits. Set up for + * which register to read and position in that register based on cpu in + * current hub. + */ +static int wait_completion(struct bau_desc *bau_desc, + struct bau_control *bcp, long try) +{ + int right_shift; + unsigned long mmr_offset; + int desc = bcp->using_desc; + + if (desc < UV_CPUS_PER_AS) { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; + right_shift = desc * UV_ACT_STATUS_SIZE; + } else { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; + right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); + } + + if (bcp->uvhub_version == 1) + return uv1_wait_completion(bau_desc, mmr_offset, right_shift, + bcp, try); + else + return uv2_wait_completion(bau_desc, mmr_offset, right_shift, + bcp, try); +} + +static inline cycles_t sec_2_cycles(unsigned long sec) +{ + unsigned long ns; + cycles_t cyc; + + ns = sec * 1000000000; + cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); + return cyc; +} + +/* + * Our retries are blocked by all destination sw ack resources being + * in use, and a timeout is pending. In that case hardware immediately + * returns the ERROR that looks like a destination timeout. + */ +static void destination_plugged(struct bau_desc *bau_desc, + struct bau_control *bcp, + struct bau_control *hmaster, struct ptc_stats *stat) +{ + udelay(bcp->plugged_delay); + bcp->plugged_tries++; + + if (bcp->plugged_tries >= bcp->plugsb4reset) { + bcp->plugged_tries = 0; + + quiesce_local_uvhub(hmaster); + + spin_lock(&hmaster->queue_lock); + reset_with_ipi(&bau_desc->distribution, bcp); + spin_unlock(&hmaster->queue_lock); + + end_uvhub_quiesce(hmaster); + + bcp->ipi_attempts++; + stat->s_resets_plug++; + } +} + +static void destination_timeout(struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, + struct ptc_stats *stat) +{ + hmaster->max_concurr = 1; + bcp->timeout_tries++; + if (bcp->timeout_tries >= bcp->timeoutsb4reset) { + bcp->timeout_tries = 0; + + quiesce_local_uvhub(hmaster); + + spin_lock(&hmaster->queue_lock); + reset_with_ipi(&bau_desc->distribution, bcp); + spin_unlock(&hmaster->queue_lock); + + end_uvhub_quiesce(hmaster); + + bcp->ipi_attempts++; + stat->s_resets_timeout++; + } +} + +/* + * Completions are taking a very long time due to a congested numalink + * network. + */ +static void disable_for_congestion(struct bau_control *bcp, + struct ptc_stats *stat) +{ + /* let only one cpu do this disabling */ + spin_lock(&disable_lock); + + if (!baudisabled && bcp->period_requests && + ((bcp->period_time / bcp->period_requests) > congested_cycles)) { + int tcpu; + struct bau_control *tbcp; + /* it becomes this cpu's job to turn on the use of the + BAU again */ + baudisabled = 1; + bcp->set_bau_off = 1; + bcp->set_bau_on_time = get_cycles(); + bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); + stat->s_bau_disabled++; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + tbcp->baudisabled = 1; + } + } + + spin_unlock(&disable_lock); +} + +static void count_max_concurr(int stat, struct bau_control *bcp, + struct bau_control *hmaster) +{ + bcp->plugged_tries = 0; + bcp->timeout_tries = 0; + if (stat != FLUSH_COMPLETE) + return; + if (bcp->conseccompletes <= bcp->complete_threshold) + return; + if (hmaster->max_concurr >= hmaster->max_concurr_const) + return; + hmaster->max_concurr++; +} + +static void record_send_stats(cycles_t time1, cycles_t time2, + struct bau_control *bcp, struct ptc_stats *stat, + int completion_status, int try) +{ + cycles_t elapsed; + + if (time2 > time1) { + elapsed = time2 - time1; + stat->s_time += elapsed; + + if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { + bcp->period_requests++; + bcp->period_time += elapsed; + if ((elapsed > congested_cycles) && + (bcp->period_requests > bcp->cong_reps)) + disable_for_congestion(bcp, stat); + } + } else + stat->s_requestor--; + + if (completion_status == FLUSH_COMPLETE && try > 1) + stat->s_retriesok++; + else if (completion_status == FLUSH_GIVEUP) + stat->s_giveup++; +} + +/* + * Because of a uv1 hardware bug only a limited number of concurrent + * requests can be made. + */ +static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) +{ + spinlock_t *lock = &hmaster->uvhub_lock; + atomic_t *v; + + v = &hmaster->active_descriptor_count; + if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) { + stat->s_throttles++; + do { + cpu_relax(); + } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)); + } +} + +/* + * Handle the completion status of a message send. + */ +static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, + struct ptc_stats *stat) +{ + if (completion_status == FLUSH_RETRY_PLUGGED) + destination_plugged(bau_desc, bcp, hmaster, stat); + else if (completion_status == FLUSH_RETRY_TIMEOUT) + destination_timeout(bau_desc, bcp, hmaster, stat); +} + +/* + * Send a broadcast and wait for it to complete. + * + * The flush_mask contains the cpus the broadcast is to be sent to including + * cpus that are on the local uvhub. + * + * Returns 0 if all flushing represented in the mask was done. + * Returns 1 if it gives up entirely and the original cpu mask is to be + * returned to the kernel. + */ +int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) +{ + int seq_number = 0; + int completion_stat = 0; + int uv1 = 0; + long try = 0; + unsigned long index; + cycles_t time1; + cycles_t time2; + struct ptc_stats *stat = bcp->statp; + struct bau_control *hmaster = bcp->uvhub_master; + struct uv1_bau_msg_header *uv1_hdr = NULL; + struct uv2_bau_msg_header *uv2_hdr = NULL; + struct bau_desc *bau_desc; + + if (bcp->uvhub_version == 1) + uv1_throttle(hmaster, stat); + + while (hmaster->uvhub_quiesce) + cpu_relax(); + + time1 = get_cycles(); + do { + bau_desc = bcp->descriptor_base; + bau_desc += (ITEMS_PER_DESC * bcp->using_desc); + if (bcp->uvhub_version == 1) { + uv1 = 1; + uv1_hdr = &bau_desc->header.uv1_hdr; + } else + uv2_hdr = &bau_desc->header.uv2_hdr; + if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) { + if (uv1) + uv1_hdr->msg_type = MSG_REGULAR; + else + uv2_hdr->msg_type = MSG_REGULAR; + seq_number = bcp->message_number++; + } else { + if (uv1) + uv1_hdr->msg_type = MSG_RETRY; + else + uv2_hdr->msg_type = MSG_RETRY; + stat->s_retry_messages++; + } + + if (uv1) + uv1_hdr->sequence = seq_number; + else + uv2_hdr->sequence = seq_number; + index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; + bcp->send_message = get_cycles(); + + write_mmr_activation(index); + + try++; + completion_stat = wait_completion(bau_desc, bcp, try); + /* UV2: wait_completion() may change the bcp->using_desc */ + + handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); + + if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { + bcp->ipi_attempts = 0; + completion_stat = FLUSH_GIVEUP; + break; + } + cpu_relax(); + } while ((completion_stat == FLUSH_RETRY_PLUGGED) || + (completion_stat == FLUSH_RETRY_BUSYBUG) || + (completion_stat == FLUSH_RETRY_TIMEOUT)); + + time2 = get_cycles(); + + count_max_concurr(completion_stat, bcp, hmaster); + + while (hmaster->uvhub_quiesce) + cpu_relax(); + + atomic_dec(&hmaster->active_descriptor_count); + + record_send_stats(time1, time2, bcp, stat, completion_stat, try); + + if (completion_stat == FLUSH_GIVEUP) + /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */ + return 1; + return 0; +} + +/* + * The BAU is disabled. When the disabled time period has expired, the cpu + * that disabled it must re-enable it. + * Return 0 if it is re-enabled for all cpus. + */ +static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) +{ + int tcpu; + struct bau_control *tbcp; + + if (bcp->set_bau_off) { + if (get_cycles() >= bcp->set_bau_on_time) { + stat->s_bau_reenabled++; + baudisabled = 0; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + tbcp->baudisabled = 0; + tbcp->period_requests = 0; + tbcp->period_time = 0; + } + return 0; + } + } + return -1; +} + +static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs, + int remotes, struct bau_desc *bau_desc) +{ + stat->s_requestor++; + stat->s_ntargcpu += remotes + locals; + stat->s_ntargremotes += remotes; + stat->s_ntarglocals += locals; + + /* uvhub statistics */ + hubs = bau_uvhub_weight(&bau_desc->distribution); + if (locals) { + stat->s_ntarglocaluvhub++; + stat->s_ntargremoteuvhub += (hubs - 1); + } else + stat->s_ntargremoteuvhub += hubs; + + stat->s_ntarguvhub += hubs; + + if (hubs >= 16) + stat->s_ntarguvhub16++; + else if (hubs >= 8) + stat->s_ntarguvhub8++; + else if (hubs >= 4) + stat->s_ntarguvhub4++; + else if (hubs >= 2) + stat->s_ntarguvhub2++; + else + stat->s_ntarguvhub1++; +} + +/* + * Translate a cpu mask to the uvhub distribution mask in the BAU + * activation descriptor. + */ +static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, + struct bau_desc *bau_desc, int *localsp, int *remotesp) +{ + int cpu; + int pnode; + int cnt = 0; + struct hub_and_pnode *hpp; + + for_each_cpu(cpu, flush_mask) { + /* + * The distribution vector is a bit map of pnodes, relative + * to the partition base pnode (and the partition base nasid + * in the header). + * Translate cpu to pnode and hub using a local memory array. + */ + hpp = &bcp->socket_master->thp[cpu]; + pnode = hpp->pnode - bcp->partition_base_pnode; + bau_uvhub_set(pnode, &bau_desc->distribution); + cnt++; + if (hpp->uvhub == bcp->uvhub) + (*localsp)++; + else + (*remotesp)++; + } + if (!cnt) + return 1; + return 0; +} + +/* + * globally purge translation cache of a virtual address or all TLB's + * @cpumask: mask of all cpu's in which the address is to be removed + * @mm: mm_struct containing virtual address range + * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * @cpu: the current cpu + * + * This is the entry point for initiating any UV global TLB shootdown. + * + * Purges the translation caches of all specified processors of the given + * virtual address, or purges all TLB's on specified processors. + * + * The caller has derived the cpumask from the mm_struct. This function + * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) + * + * The cpumask is converted into a uvhubmask of the uvhubs containing + * those cpus. + * + * Note that this function should be called with preemption disabled. + * + * Returns NULL if all remote flushing was done. + * Returns pointer to cpumask if some remote flushing remains to be + * done. The returned pointer is valid till preemption is re-enabled. + */ +const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va, + unsigned int cpu) +{ + int locals = 0; + int remotes = 0; + int hubs = 0; + struct bau_desc *bau_desc; + struct cpumask *flush_mask; + struct ptc_stats *stat; + struct bau_control *bcp; + + /* kernel was booted 'nobau' */ + if (nobau) + return cpumask; + + bcp = &per_cpu(bau_control, cpu); + stat = bcp->statp; + + /* bau was disabled due to slow response */ + if (bcp->baudisabled) { + if (check_enable(bcp, stat)) + return cpumask; + } + + /* + * Each sending cpu has a per-cpu mask which it fills from the caller's + * cpu mask. All cpus are converted to uvhubs and copied to the + * activation descriptor. + */ + flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); + /* don't actually do a shootdown of the local cpu */ + cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + + if (cpu_isset(cpu, *cpumask)) + stat->s_ntargself++; + + bau_desc = bcp->descriptor_base; + bau_desc += (ITEMS_PER_DESC * bcp->using_desc); + bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); + if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) + return NULL; + + record_send_statistics(stat, locals, hubs, remotes, bau_desc); + + bau_desc->payload.address = va; + bau_desc->payload.sending_cpu = cpu; + /* + * uv_flush_send_and_wait returns 0 if all cpu's were messaged, + * or 1 if it gave up and the original cpumask should be returned. + */ + if (!uv_flush_send_and_wait(flush_mask, bcp)) + return NULL; + else + return cpumask; +} + +/* + * Search the message queue for any 'other' message with the same software + * acknowledge resource bit vector. + */ +struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, + struct bau_control *bcp, unsigned char swack_vec) +{ + struct bau_pq_entry *msg_next = msg + 1; + + if (msg_next > bcp->queue_last) + msg_next = bcp->queue_first; + while ((msg_next->swack_vec != 0) && (msg_next != msg)) { + if (msg_next->swack_vec == swack_vec) + return msg_next; + msg_next++; + if (msg_next > bcp->queue_last) + msg_next = bcp->queue_first; + } + return NULL; +} + +/* + * UV2 needs to work around a bug in which an arriving message has not + * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register. + * Such a message must be ignored. + */ +void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) +{ + unsigned long mmr_image; + unsigned char swack_vec; + struct bau_pq_entry *msg = mdp->msg; + struct bau_pq_entry *other_msg; + + mmr_image = read_mmr_sw_ack(); + swack_vec = msg->swack_vec; + + if ((swack_vec & mmr_image) == 0) { + /* + * This message was assigned a swack resource, but no + * reserved acknowlegment is pending. + * The bug has prevented this message from setting the MMR. + * And no other message has used the same sw_ack resource. + * Do the requested shootdown but do not reply to the msg. + * (the 0 means make no acknowledge) + */ + bau_process_message(mdp, bcp, 0); + return; + } + + /* + * Some message has set the MMR 'pending' bit; it might have been + * another message. Look for that message. + */ + other_msg = find_another_by_swack(msg, bcp, msg->swack_vec); + if (other_msg) { + /* There is another. Do not ack the current one. */ + bau_process_message(mdp, bcp, 0); + /* + * Let the natural processing of that message acknowledge + * it. Don't get the processing of sw_ack's out of order. + */ + return; + } + + /* + * There is no other message using this sw_ack, so it is safe to + * acknowledge it. + */ + bau_process_message(mdp, bcp, 1); + + return; +} + +/* + * The BAU message interrupt comes here. (registered by set_intr_gate) + * See entry_64.S + * + * We received a broadcast assist message. + * + * Interrupts are disabled; this interrupt could represent + * the receipt of several messages. + * + * All cores/threads on this hub get this interrupt. + * The last one to see it does the software ack. + * (the resource will not be freed until noninterruptable cpus see this + * interrupt; hardware may timeout the s/w ack and reply ERROR) + */ +void uv_bau_message_interrupt(struct pt_regs *regs) +{ + int count = 0; + cycles_t time_start; + struct bau_pq_entry *msg; + struct bau_control *bcp; + struct ptc_stats *stat; + struct msg_desc msgdesc; + + ack_APIC_irq(); + time_start = get_cycles(); + + bcp = &per_cpu(bau_control, smp_processor_id()); + stat = bcp->statp; + + msgdesc.queue_first = bcp->queue_first; + msgdesc.queue_last = bcp->queue_last; + + msg = bcp->bau_msg_head; + while (msg->swack_vec) { + count++; + + msgdesc.msg_slot = msg - msgdesc.queue_first; + msgdesc.msg = msg; + if (bcp->uvhub_version == 2) + process_uv2_message(&msgdesc, bcp); + else + bau_process_message(&msgdesc, bcp, 1); + + msg++; + if (msg > msgdesc.queue_last) + msg = msgdesc.queue_first; + bcp->bau_msg_head = msg; + } + stat->d_time += (get_cycles() - time_start); + if (!count) + stat->d_nomsg++; + else if (count > 1) + stat->d_multmsg++; +} + +/* + * Each target uvhub (i.e. a uvhub that has cpu's) needs to have + * shootdown message timeouts enabled. The timeout does not cause + * an interrupt, but causes an error message to be returned to + * the sender. + */ +static void __init enable_timeouts(void) +{ + int uvhub; + int nuvhubs; + int pnode; + unsigned long mmr_image; + + nuvhubs = uv_num_possible_blades(); + + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + if (!uv_blade_nr_possible_cpus(uvhub)) + continue; + + pnode = uv_blade_to_pnode(uvhub); + mmr_image = read_mmr_misc_control(pnode); + /* + * Set the timeout period and then lock it in, in three + * steps; captures and locks in the period. + * + * To program the period, the SOFT_ACK_MODE must be off. + */ + mmr_image &= ~(1L << SOFTACK_MSHIFT); + write_mmr_misc_control(pnode, mmr_image); + /* + * Set the 4-bit period. + */ + mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT); + mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT); + write_mmr_misc_control(pnode, mmr_image); + /* + * UV1: + * Subsequent reversals of the timebase bit (3) cause an + * immediate timeout of one or all INTD resources as + * indicated in bits 2:0 (7 causes all of them to timeout). + */ + mmr_image |= (1L << SOFTACK_MSHIFT); + if (is_uv2_hub()) { + mmr_image |= (1L << UV2_EXT_SHFT); + } + write_mmr_misc_control(pnode, mmr_image); + } +} + +static void *ptc_seq_start(struct seq_file *file, loff_t *offset) +{ + if (*offset < num_possible_cpus()) + return offset; + return NULL; +} + +static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) +{ + (*offset)++; + if (*offset < num_possible_cpus()) + return offset; + return NULL; +} + +static void ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static inline unsigned long long usec_2_cycles(unsigned long microsec) +{ + unsigned long ns; + unsigned long long cyc; + + ns = microsec * 1000; + cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); + return cyc; +} + +/* + * Display the statistics thru /proc/sgi_uv/ptc_statistics + * 'data' points to the cpu number + * Note: see the descriptions in stat_description[]. + */ +static int ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *)data; + if (!cpu) { + seq_printf(file, + "# cpu sent stime self locals remotes ncpus localhub "); + seq_printf(file, + "remotehub numuvhubs numuvhubs16 numuvhubs8 "); + seq_printf(file, + "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); + seq_printf(file, + "resetp resett giveup sto bz throt swack recv rtime "); + seq_printf(file, + "all one mult none retry canc nocan reset rcan "); + seq_printf(file, + "disable enable wars warshw warwaits\n"); + } + if (cpu < num_possible_cpus() && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + /* source side statistics */ + seq_printf(file, + "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + cpu, stat->s_requestor, cycles_2_us(stat->s_time), + stat->s_ntargself, stat->s_ntarglocals, + stat->s_ntargremotes, stat->s_ntargcpu, + stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, + stat->s_ntarguvhub, stat->s_ntarguvhub16); + seq_printf(file, "%ld %ld %ld %ld %ld %ld ", + stat->s_ntarguvhub8, stat->s_ntarguvhub4, + stat->s_ntarguvhub2, stat->s_ntarguvhub1, + stat->s_dtimeout, stat->s_strongnacks); + seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", + stat->s_retry_messages, stat->s_retriesok, + stat->s_resets_plug, stat->s_resets_timeout, + stat->s_giveup, stat->s_stimeout, + stat->s_busy, stat->s_throttles); + + /* destination side statistics */ + seq_printf(file, + "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), + stat->d_requestee, cycles_2_us(stat->d_time), + stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, + stat->d_nomsg, stat->d_retries, stat->d_canceled, + stat->d_nocanceled, stat->d_resets, + stat->d_rcanceled); + seq_printf(file, "%ld %ld %ld %ld %ld\n", + stat->s_bau_disabled, stat->s_bau_reenabled, + stat->s_uv2_wars, stat->s_uv2_wars_hw, + stat->s_uv2_war_waits); + } + return 0; +} + +/* + * Display the tunables thru debugfs + */ +static ssize_t tunables_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + char *buf; + int ret; + + buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", + "max_concur plugged_delay plugsb4reset", + "timeoutsb4reset ipi_reset_limit complete_threshold", + "congested_response_us congested_reps congested_period", + max_concurr, plugged_delay, plugsb4reset, + timeoutsb4reset, ipi_reset_limit, complete_threshold, + congested_respns_us, congested_reps, congested_period); + + if (!buf) + return -ENOMEM; + + ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); + kfree(buf); + return ret; +} + +/* + * handle a write to /proc/sgi_uv/ptc_statistics + * -1: reset the statistics + * 0: display meaning of the statistics + */ +static ssize_t ptc_proc_write(struct file *file, const char __user *user, + size_t count, loff_t *data) +{ + int cpu; + int i; + int elements; + long input_arg; + char optstr[64]; + struct ptc_stats *stat; + + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + + if (strict_strtol(optstr, 10, &input_arg) < 0) { + printk(KERN_DEBUG "%s is invalid\n", optstr); + return -EINVAL; + } + + if (input_arg == 0) { + elements = sizeof(stat_description)/sizeof(*stat_description); + printk(KERN_DEBUG "# cpu: cpu number\n"); + printk(KERN_DEBUG "Sender statistics:\n"); + for (i = 0; i < elements; i++) + printk(KERN_DEBUG "%s\n", stat_description[i]); + } else if (input_arg == -1) { + for_each_present_cpu(cpu) { + stat = &per_cpu(ptcstats, cpu); + memset(stat, 0, sizeof(struct ptc_stats)); + } + } + + return count; +} + +static int local_atoi(const char *name) +{ + int val = 0; + + for (;; name++) { + switch (*name) { + case '0' ... '9': + val = 10*val+(*name-'0'); + break; + default: + return val; + } + } +} + +/* + * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables. + * Zero values reset them to defaults. + */ +static int parse_tunables_write(struct bau_control *bcp, char *instr, + int count) +{ + char *p; + char *q; + int cnt = 0; + int val; + int e = sizeof(tunables) / sizeof(*tunables); + + p = instr + strspn(instr, WHITESPACE); + q = p; + for (; *p; p = q + strspn(q, WHITESPACE)) { + q = p + strcspn(p, WHITESPACE); + cnt++; + if (q == p) + break; + } + if (cnt != e) { + printk(KERN_INFO "bau tunable error: should be %d values\n", e); + return -EINVAL; + } + + p = instr + strspn(instr, WHITESPACE); + q = p; + for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { + q = p + strcspn(p, WHITESPACE); + val = local_atoi(p); + switch (cnt) { + case 0: + if (val == 0) { + max_concurr = MAX_BAU_CONCURRENT; + max_concurr_const = MAX_BAU_CONCURRENT; + continue; + } + if (val < 1 || val > bcp->cpus_in_uvhub) { + printk(KERN_DEBUG + "Error: BAU max concurrent %d is invalid\n", + val); + return -EINVAL; + } + max_concurr = val; + max_concurr_const = val; + continue; + default: + if (val == 0) + *tunables[cnt].tunp = tunables[cnt].deflt; + else + *tunables[cnt].tunp = val; + continue; + } + if (q == p) + break; + } + return 0; +} + +/* + * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables) + */ +static ssize_t tunables_write(struct file *file, const char __user *user, + size_t count, loff_t *data) +{ + int cpu; + int ret; + char instr[100]; + struct bau_control *bcp; + + if (count == 0 || count > sizeof(instr)-1) + return -EINVAL; + if (copy_from_user(instr, user, count)) + return -EFAULT; + + instr[count] = '\0'; + + cpu = get_cpu(); + bcp = &per_cpu(bau_control, cpu); + ret = parse_tunables_write(bcp, instr, count); + put_cpu(); + if (ret) + return ret; + + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->max_concurr = max_concurr; + bcp->max_concurr_const = max_concurr; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->cong_response_us = congested_respns_us; + bcp->cong_reps = congested_reps; + bcp->cong_period = congested_period; + } + return count; +} + +static const struct seq_operations uv_ptc_seq_ops = { + .start = ptc_seq_start, + .next = ptc_seq_next, + .stop = ptc_seq_stop, + .show = ptc_seq_show +}; + +static int ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &uv_ptc_seq_ops); +} + +static int tunables_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static const struct file_operations proc_uv_ptc_operations = { + .open = ptc_proc_open, + .read = seq_read, + .write = ptc_proc_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations tunables_fops = { + .open = tunables_open, + .read = tunables_read, + .write = tunables_write, + .llseek = default_llseek, +}; + +static int __init uv_ptc_init(void) +{ + struct proc_dir_entry *proc_uv_ptc; + + if (!is_uv_system()) + return 0; + + proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, + &proc_uv_ptc_operations); + if (!proc_uv_ptc) { + printk(KERN_ERR "unable to create %s proc entry\n", + UV_PTC_BASENAME); + return -EINVAL; + } + + tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); + if (!tunables_dir) { + printk(KERN_ERR "unable to create debugfs directory %s\n", + UV_BAU_TUNABLES_DIR); + return -EINVAL; + } + tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, + tunables_dir, NULL, &tunables_fops); + if (!tunables_file) { + printk(KERN_ERR "unable to create debugfs file %s\n", + UV_BAU_TUNABLES_FILE); + return -EINVAL; + } + return 0; +} + +/* + * Initialize the sending side's sending buffers. + */ +static void activation_descriptor_init(int node, int pnode, int base_pnode) +{ + int i; + int cpu; + int uv1 = 0; + unsigned long gpa; + unsigned long m; + unsigned long n; + size_t dsize; + struct bau_desc *bau_desc; + struct bau_desc *bd2; + struct uv1_bau_msg_header *uv1_hdr; + struct uv2_bau_msg_header *uv2_hdr; + struct bau_control *bcp; + + /* + * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC) + * per cpu; and one per cpu on the uvhub (ADP_SZ) + */ + dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC; + bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); + BUG_ON(!bau_desc); + + gpa = uv_gpa(bau_desc); + n = uv_gpa_to_gnode(gpa); + m = uv_gpa_to_offset(gpa); + if (is_uv1_hub()) + uv1 = 1; + + /* the 14-bit pnode */ + write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); + /* + * Initializing all 8 (ITEMS_PER_DESC) descriptors for each + * cpu even though we only use the first one; one descriptor can + * describe a broadcast to 256 uv hubs. + */ + for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { + memset(bd2, 0, sizeof(struct bau_desc)); + if (uv1) { + uv1_hdr = &bd2->header.uv1_hdr; + uv1_hdr->swack_flag = 1; + /* + * The base_dest_nasid set in the message header + * is the nasid of the first uvhub in the partition. + * The bit map will indicate destination pnode numbers + * relative to that base. They may not be consecutive + * if nasid striding is being used. + */ + uv1_hdr->base_dest_nasid = + UV_PNODE_TO_NASID(base_pnode); + uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID; + uv1_hdr->command = UV_NET_ENDPOINT_INTD; + uv1_hdr->int_both = 1; + /* + * all others need to be set to zero: + * fairness chaining multilevel count replied_to + */ + } else { + uv2_hdr = &bd2->header.uv2_hdr; + uv2_hdr->swack_flag = 1; + uv2_hdr->base_dest_nasid = + UV_PNODE_TO_NASID(base_pnode); + uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID; + uv2_hdr->command = UV_NET_ENDPOINT_INTD; + } + } + for_each_present_cpu(cpu) { + if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) + continue; + bcp = &per_cpu(bau_control, cpu); + bcp->descriptor_base = bau_desc; + } +} + +/* + * initialize the destination side's receiving buffers + * entered for each uvhub in the partition + * - node is first node (kernel memory notion) on the uvhub + * - pnode is the uvhub's physical identifier + */ +static void pq_init(int node, int pnode) +{ + int cpu; + size_t plsize; + char *cp; + void *vp; + unsigned long pn; + unsigned long first; + unsigned long pn_first; + unsigned long last; + struct bau_pq_entry *pqp; + struct bau_control *bcp; + + plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry); + vp = kmalloc_node(plsize, GFP_KERNEL, node); + pqp = (struct bau_pq_entry *)vp; + BUG_ON(!pqp); + + cp = (char *)pqp + 31; + pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5); + + for_each_present_cpu(cpu) { + if (pnode != uv_cpu_to_pnode(cpu)) + continue; + /* for every cpu on this pnode: */ + bcp = &per_cpu(bau_control, cpu); + bcp->queue_first = pqp; + bcp->bau_msg_head = pqp; + bcp->queue_last = pqp + (DEST_Q_SIZE - 1); + } + /* + * need the gnode of where the memory was really allocated + */ + pn = uv_gpa_to_gnode(uv_gpa(pqp)); + first = uv_physnodeaddr(pqp); + pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; + last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); + write_mmr_payload_first(pnode, pn_first); + write_mmr_payload_tail(pnode, first); + write_mmr_payload_last(pnode, last); + write_gmmr_sw_ack(pnode, 0xffffUL); + + /* in effect, all msg_type's are set to MSG_NOOP */ + memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); +} + +/* + * Initialization of each UV hub's structures + */ +static void __init init_uvhub(int uvhub, int vector, int base_pnode) +{ + int node; + int pnode; + unsigned long apicid; + + node = uvhub_to_first_node(uvhub); + pnode = uv_blade_to_pnode(uvhub); + + activation_descriptor_init(node, pnode, base_pnode); + + pq_init(node, pnode); + /* + * The below initialization can't be in firmware because the + * messaging IRQ will be determined by the OS. + */ + apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; + write_mmr_data_config(pnode, ((apicid << 32) | vector)); +} + +/* + * We will set BAU_MISC_CONTROL with a timeout period. + * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. + * So the destination timeout period has to be calculated from them. + */ +static int calculate_destination_timeout(void) +{ + unsigned long mmr_image; + int mult1; + int mult2; + int index; + int base; + int ret; + unsigned long ts_ns; + + if (is_uv1_hub()) { + mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; + mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); + index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; + mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); + mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; + base = timeout_base_ns[index]; + ts_ns = base * mult1 * mult2; + ret = ts_ns / 1000; + } else { + /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ + mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL); + mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; + if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) + base = 80; + else + base = 10; + mult1 = mmr_image & UV2_ACK_MASK; + ret = mult1 * base; + } + return ret; +} + +static void __init init_per_cpu_tunables(void) +{ + int cpu; + struct bau_control *bcp; + + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->baudisabled = 0; + bcp->statp = &per_cpu(ptcstats, cpu); + /* time interval to catch a hardware stay-busy bug */ + bcp->timeout_interval = usec_2_cycles(2*timeout_us); + bcp->max_concurr = max_concurr; + bcp->max_concurr_const = max_concurr; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->cong_response_us = congested_respns_us; + bcp->cong_reps = congested_reps; + bcp->cong_period = congested_period; + bcp->clocks_per_100_usec = usec_2_cycles(100); + spin_lock_init(&bcp->queue_lock); + spin_lock_init(&bcp->uvhub_lock); + } +} + +/* + * Scan all cpus to collect blade and socket summaries. + */ +static int __init get_cpu_topology(int base_pnode, + struct uvhub_desc *uvhub_descs, + unsigned char *uvhub_mask) +{ + int cpu; + int pnode; + int uvhub; + int socket; + struct bau_control *bcp; + struct uvhub_desc *bdp; + struct socket_desc *sdp; + + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + + memset(bcp, 0, sizeof(struct bau_control)); + + pnode = uv_cpu_hub_info(cpu)->pnode; + if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) { + printk(KERN_EMERG + "cpu %d pnode %d-%d beyond %d; BAU disabled\n", + cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); + return 1; + } + + bcp->osnode = cpu_to_node(cpu); + bcp->partition_base_pnode = base_pnode; + + uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; + *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); + bdp = &uvhub_descs[uvhub]; + + bdp->num_cpus++; + bdp->uvhub = uvhub; + bdp->pnode = pnode; + + /* kludge: 'assuming' one node per socket, and assuming that + disabling a socket just leaves a gap in node numbers */ + socket = bcp->osnode & 1; + bdp->socket_mask |= (1 << socket); + sdp = &bdp->socket[socket]; + sdp->cpu_number[sdp->num_cpus] = cpu; + sdp->num_cpus++; + if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { + printk(KERN_EMERG "%d cpus per socket invalid\n", + sdp->num_cpus); + return 1; + } + } + return 0; +} + +/* + * Each socket is to get a local array of pnodes/hubs. + */ +static void make_per_cpu_thp(struct bau_control *smaster) +{ + int cpu; + size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); + + smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); + memset(smaster->thp, 0, hpsz); + for_each_present_cpu(cpu) { + smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode; + smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; + } +} + +/* + * Each uvhub is to get a local cpumask. + */ +static void make_per_hub_cpumask(struct bau_control *hmaster) +{ + int sz = sizeof(cpumask_t); + + hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode); +} + +/* + * Initialize all the per_cpu information for the cpu's on a given socket, + * given what has been gathered into the socket_desc struct. + * And reports the chosen hub and socket masters back to the caller. + */ +static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, + struct bau_control **smasterp, + struct bau_control **hmasterp) +{ + int i; + int cpu; + struct bau_control *bcp; + + for (i = 0; i < sdp->num_cpus; i++) { + cpu = sdp->cpu_number[i]; + bcp = &per_cpu(bau_control, cpu); + bcp->cpu = cpu; + if (i == 0) { + *smasterp = bcp; + if (!(*hmasterp)) + *hmasterp = bcp; + } + bcp->cpus_in_uvhub = bdp->num_cpus; + bcp->cpus_in_socket = sdp->num_cpus; + bcp->socket_master = *smasterp; + bcp->uvhub = bdp->uvhub; + if (is_uv1_hub()) + bcp->uvhub_version = 1; + else if (is_uv2_hub()) + bcp->uvhub_version = 2; + else { + printk(KERN_EMERG "uvhub version not 1 or 2\n"); + return 1; + } + bcp->uvhub_master = *hmasterp; + bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; + bcp->using_desc = bcp->uvhub_cpu; + if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { + printk(KERN_EMERG "%d cpus per uvhub invalid\n", + bcp->uvhub_cpu); + return 1; + } + } + return 0; +} + +/* + * Summarize the blade and socket topology into the per_cpu structures. + */ +static int __init summarize_uvhub_sockets(int nuvhubs, + struct uvhub_desc *uvhub_descs, + unsigned char *uvhub_mask) +{ + int socket; + int uvhub; + unsigned short socket_mask; + + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + struct uvhub_desc *bdp; + struct bau_control *smaster = NULL; + struct bau_control *hmaster = NULL; + + if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) + continue; + + bdp = &uvhub_descs[uvhub]; + socket_mask = bdp->socket_mask; + socket = 0; + while (socket_mask) { + struct socket_desc *sdp; + if ((socket_mask & 1)) { + sdp = &bdp->socket[socket]; + if (scan_sock(sdp, bdp, &smaster, &hmaster)) + return 1; + make_per_cpu_thp(smaster); + } + socket++; + socket_mask = (socket_mask >> 1); + } + make_per_hub_cpumask(hmaster); + } + return 0; +} + +/* + * initialize the bau_control structure for each cpu + */ +static int __init init_per_cpu(int nuvhubs, int base_part_pnode) +{ + unsigned char *uvhub_mask; + void *vp; + struct uvhub_desc *uvhub_descs; + + timeout_us = calculate_destination_timeout(); + + vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); + uvhub_descs = (struct uvhub_desc *)vp; + memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); + uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); + + if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) + goto fail; + + if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) + goto fail; + + kfree(uvhub_descs); + kfree(uvhub_mask); + init_per_cpu_tunables(); + return 0; + +fail: + kfree(uvhub_descs); + kfree(uvhub_mask); + return 1; +} + +/* + * Initialization of BAU-related structures + */ +static int __init uv_bau_init(void) +{ + int uvhub; + int pnode; + int nuvhubs; + int cur_cpu; + int cpus; + int vector; + cpumask_var_t *mask; + + if (!is_uv_system()) + return 0; + + if (nobau) + return 0; + + for_each_possible_cpu(cur_cpu) { + mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); + } + + nuvhubs = uv_num_possible_blades(); + spin_lock_init(&disable_lock); + congested_cycles = usec_2_cycles(congested_respns_us); + + uv_base_pnode = 0x7fffffff; + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + cpus = uv_blade_nr_possible_cpus(uvhub); + if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode)) + uv_base_pnode = uv_blade_to_pnode(uvhub); + } + + enable_timeouts(); + + if (init_per_cpu(nuvhubs, uv_base_pnode)) { + nobau = 1; + return 0; + } + + vector = UV_BAU_MESSAGE; + for_each_possible_blade(uvhub) + if (uv_blade_nr_possible_cpus(uvhub)) + init_uvhub(uvhub, vector, uv_base_pnode); + + alloc_intr_gate(vector, uv_bau_message_intr1); + + for_each_possible_blade(uvhub) { + if (uv_blade_nr_possible_cpus(uvhub)) { + unsigned long val; + unsigned long mmr; + pnode = uv_blade_to_pnode(uvhub); + /* INIT the bau */ + val = 1L << 63; + write_gmmr_activation(pnode, val); + mmr = 1; /* should be 1 to broadcast to both sockets */ + if (!is_uv1_hub()) + write_mmr_data_broadcast(pnode, mmr); + } + } + + return 0; +} +core_initcall(uv_bau_init); +fs_initcall(uv_ptc_init); diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c new file mode 100644 index 00000000..f25c2765 --- /dev/null +++ b/arch/x86/platform/uv/uv_irq.c @@ -0,0 +1,285 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV IRQ functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/rbtree.h> +#include <linux/slab.h> +#include <linux/irq.h> + +#include <asm/apic.h> +#include <asm/uv/uv_irq.h> +#include <asm/uv/uv_hub.h> + +/* MMR offset and pnode of hub sourcing interrupts for a given irq */ +struct uv_irq_2_mmr_pnode{ + struct rb_node list; + unsigned long offset; + int pnode; + int irq; +}; + +static DEFINE_SPINLOCK(uv_irq_lock); +static struct rb_root uv_irq_root; + +static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); + +static void uv_noop(struct irq_data *data) { } + +static void uv_ack_apic(struct irq_data *data) +{ + ack_APIC_irq(); +} + +static struct irq_chip uv_irq_chip = { + .name = "UV-CORE", + .irq_mask = uv_noop, + .irq_unmask = uv_noop, + .irq_eoi = uv_ack_apic, + .irq_set_affinity = uv_set_irq_affinity, +}; + +/* + * Add offset and pnode information of the hub sourcing interrupts to the + * rb tree for a specific irq. + */ +static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +{ + struct rb_node **link = &uv_irq_root.rb_node; + struct rb_node *parent = NULL; + struct uv_irq_2_mmr_pnode *n; + struct uv_irq_2_mmr_pnode *e; + unsigned long irqflags; + + n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, + uv_blade_to_memory_nid(blade)); + if (!n) + return -ENOMEM; + + n->irq = irq; + n->offset = offset; + n->pnode = uv_blade_to_pnode(blade); + spin_lock_irqsave(&uv_irq_lock, irqflags); + /* Find the right place in the rbtree: */ + while (*link) { + parent = *link; + e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); + + if (unlikely(irq == e->irq)) { + /* irq entry exists */ + e->pnode = uv_blade_to_pnode(blade); + e->offset = offset; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + kfree(n); + return 0; + } + + if (irq < e->irq) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Insert the node into the rbtree. */ + rb_link_node(&n->list, parent, link); + rb_insert_color(&n->list, &uv_irq_root); + + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; +} + +/* Retrieve offset and pnode information from the rb tree for a specific irq */ +int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + + if (e->irq == irq) { + *offset = e->offset; + *pnode = e->pnode; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; + } + + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return -1; +} + +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +static int +arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset, int limit) +{ + const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_cfg *cfg = irq_get_chip_data(irq); + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int mmr_pnode, err; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + + if (limit == UV_AFFINITY_CPU) + irq_set_status_flags(irq, IRQ_NO_BALANCING); + else + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + + irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->mask = 1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} + +static int +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_cfg *cfg = data->chip_data; + unsigned int dest; + unsigned long mmr_value, mmr_offset; + struct uv_IO_APIC_route_entry *entry; + int mmr_pnode; + + if (__ioapic_set_affinity(data, mask, &dest)) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} + +/* + * Set up a mapping of an available irq and vector, and enable the specified + * MMR that defines the MSI that is to be sent to the specified CPU when an + * interrupt is raised. + */ +int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, + unsigned long mmr_offset, int limit) +{ + int irq, ret; + + irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); + + if (irq <= 0) + return -EBUSY; + + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, + limit); + if (ret == irq) + uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); + else + destroy_irq(irq); + + return ret; +} +EXPORT_SYMBOL_GPL(uv_setup_irq); + +/* + * Tear down a mapping of an irq and vector, and disable the specified MMR that + * defined the MSI that was to be sent to the specified CPU when an interrupt + * was raised. + * + * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). + */ +void uv_teardown_irq(unsigned int irq) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + if (e->irq == irq) { + arch_disable_uv_irq(e->pnode, e->offset); + rb_erase(n, &uv_irq_root); + kfree(e); + break; + } + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + destroy_irq(irq); +} +EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c new file mode 100644 index 00000000..5d4ba301 --- /dev/null +++ b/arch/x86/platform/uv/uv_sysfs.c @@ -0,0 +1,76 @@ +/* + * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson + */ + +#include <linux/device.h> +#include <asm/uv/bios.h> +#include <asm/uv/uv.h> + +struct kobject *sgi_uv_kobj; + +static ssize_t partition_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); +} + +static ssize_t coherence_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); +} + +static struct kobj_attribute partition_id_attr = + __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); + +static struct kobj_attribute coherence_id_attr = + __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); + + +static int __init sgi_uv_sysfs_init(void) +{ + unsigned long ret; + + if (!is_uv_system()) + return -ENODEV; + + if (!sgi_uv_kobj) + sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); + if (!sgi_uv_kobj) { + printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); + return -EINVAL; + } + + ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); + return ret; + } + + ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); + return ret; + } + + return 0; +} + +device_initcall(sgi_uv_sysfs_init); diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c new file mode 100644 index 00000000..5032e0d1 --- /dev/null +++ b/arch/x86/platform/uv/uv_time.c @@ -0,0 +1,425 @@ +/* + * SGI RTC clock/timer routines. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Dimitri Sivanich + */ +#include <linux/clockchips.h> +#include <linux/slab.h> + +#include <asm/uv/uv_mmrs.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/bios.h> +#include <asm/uv/uv.h> +#include <asm/apic.h> +#include <asm/cpu.h> + +#define RTC_NAME "sgi_rtc" + +static cycle_t uv_read_rtc(struct clocksource *cs); +static int uv_rtc_next_event(unsigned long, struct clock_event_device *); +static void uv_rtc_timer_setup(enum clock_event_mode, + struct clock_event_device *); + +static struct clocksource clocksource_uv = { + .name = RTC_NAME, + .rating = 299, + .read = uv_read_rtc, + .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static struct clock_event_device clock_event_device_uv = { + .name = RTC_NAME, + .features = CLOCK_EVT_FEAT_ONESHOT, + .shift = 20, + .rating = 400, + .irq = -1, + .set_next_event = uv_rtc_next_event, + .set_mode = uv_rtc_timer_setup, + .event_handler = NULL, +}; + +static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); + +/* There is one of these allocated per node */ +struct uv_rtc_timer_head { + spinlock_t lock; + /* next cpu waiting for timer, local node relative: */ + int next_cpu; + /* number of cpus on this node: */ + int ncpus; + struct { + int lcpu; /* systemwide logical cpu number */ + u64 expires; /* next timer expiration for this cpu */ + } cpu[1]; +}; + +/* + * Access to uv_rtc_timer_head via blade id. + */ +static struct uv_rtc_timer_head **blade_info __read_mostly; + +static int uv_rtc_evt_enable; + +/* + * Hardware interface routines + */ + +/* Send IPIs to another node */ +static void uv_rtc_send_IPI(int cpu) +{ + unsigned long apicid, val; + int pnode; + + apicid = cpu_physical_id(cpu); + pnode = uv_apicid_to_pnode(apicid); + apicid |= uv_apicid_hibits; + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); + + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); +} + +/* Check for an RTC interrupt pending */ +static int uv_intr_pending(int pnode) +{ + if (is_uv1_hub()) + return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & + UV1H_EVENT_OCCURRED0_RTC1_MASK; + else + return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) & + UV2H_EVENT_OCCURRED2_RTC_1_MASK; +} + +/* Setup interrupt and return non-zero if early expiration occurred. */ +static int uv_setup_intr(int cpu, u64 expires) +{ + u64 val; + unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits; + int pnode = uv_cpu_to_pnode(cpu); + + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, + UVH_RTC1_INT_CONFIG_M_MASK); + uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); + + if (is_uv1_hub()) + uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, + UV1H_EVENT_OCCURRED0_RTC1_MASK); + else + uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS, + UV2H_EVENT_OCCURRED2_RTC_1_MASK); + + val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | + ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); + + /* Set configuration */ + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); + /* Initialize comparator value */ + uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); + + if (uv_read_rtc(NULL) <= expires) + return 0; + + return !uv_intr_pending(pnode); +} + +/* + * Per-cpu timer tracking routines + */ + +static __init void uv_rtc_deallocate_timers(void) +{ + int bid; + + for_each_possible_blade(bid) { + kfree(blade_info[bid]); + } + kfree(blade_info); +} + +/* Allocate per-node list of cpu timer expiration times. */ +static __init int uv_rtc_allocate_timers(void) +{ + int cpu; + + blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); + if (!blade_info) + return -ENOMEM; + memset(blade_info, 0, uv_possible_blades * sizeof(void *)); + + for_each_present_cpu(cpu) { + int nid = cpu_to_node(cpu); + int bid = uv_cpu_to_blade_id(cpu); + int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + struct uv_rtc_timer_head *head = blade_info[bid]; + + if (!head) { + head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + + (uv_blade_nr_possible_cpus(bid) * + 2 * sizeof(u64)), + GFP_KERNEL, nid); + if (!head) { + uv_rtc_deallocate_timers(); + return -ENOMEM; + } + spin_lock_init(&head->lock); + head->ncpus = uv_blade_nr_possible_cpus(bid); + head->next_cpu = -1; + blade_info[bid] = head; + } + + head->cpu[bcpu].lcpu = cpu; + head->cpu[bcpu].expires = ULLONG_MAX; + } + + return 0; +} + +/* Find and set the next expiring timer. */ +static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode) +{ + u64 lowest = ULLONG_MAX; + int c, bcpu = -1; + + head->next_cpu = -1; + for (c = 0; c < head->ncpus; c++) { + u64 exp = head->cpu[c].expires; + if (exp < lowest) { + bcpu = c; + lowest = exp; + } + } + if (bcpu >= 0) { + head->next_cpu = bcpu; + c = head->cpu[bcpu].lcpu; + if (uv_setup_intr(c, lowest)) + /* If we didn't set it up in time, trigger */ + uv_rtc_send_IPI(c); + } else { + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, + UVH_RTC1_INT_CONFIG_M_MASK); + } +} + +/* + * Set expiration time for current cpu. + * + * Returns 1 if we missed the expiration time. + */ +static int uv_rtc_set_timer(int cpu, u64 expires) +{ + int pnode = uv_cpu_to_pnode(cpu); + int bid = uv_cpu_to_blade_id(cpu); + struct uv_rtc_timer_head *head = blade_info[bid]; + int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + u64 *t = &head->cpu[bcpu].expires; + unsigned long flags; + int next_cpu; + + spin_lock_irqsave(&head->lock, flags); + + next_cpu = head->next_cpu; + *t = expires; + + /* Will this one be next to go off? */ + if (next_cpu < 0 || bcpu == next_cpu || + expires < head->cpu[next_cpu].expires) { + head->next_cpu = bcpu; + if (uv_setup_intr(cpu, expires)) { + *t = ULLONG_MAX; + uv_rtc_find_next_timer(head, pnode); + spin_unlock_irqrestore(&head->lock, flags); + return -ETIME; + } + } + + spin_unlock_irqrestore(&head->lock, flags); + return 0; +} + +/* + * Unset expiration time for current cpu. + * + * Returns 1 if this timer was pending. + */ +static int uv_rtc_unset_timer(int cpu, int force) +{ + int pnode = uv_cpu_to_pnode(cpu); + int bid = uv_cpu_to_blade_id(cpu); + struct uv_rtc_timer_head *head = blade_info[bid]; + int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + u64 *t = &head->cpu[bcpu].expires; + unsigned long flags; + int rc = 0; + + spin_lock_irqsave(&head->lock, flags); + + if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) + rc = 1; + + if (rc) { + *t = ULLONG_MAX; + /* Was the hardware setup for this timer? */ + if (head->next_cpu == bcpu) + uv_rtc_find_next_timer(head, pnode); + } + + spin_unlock_irqrestore(&head->lock, flags); + + return rc; +} + + +/* + * Kernel interface routines. + */ + +/* + * Read the RTC. + * + * Starting with HUB rev 2.0, the UV RTC register is replicated across all + * cachelines of it's own page. This allows faster simultaneous reads + * from a given socket. + */ +static cycle_t uv_read_rtc(struct clocksource *cs) +{ + unsigned long offset; + + if (uv_get_min_hub_revision_id() == 1) + offset = 0; + else + offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; + + return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); +} + +/* + * Program the next event, relative to now + */ +static int uv_rtc_next_event(unsigned long delta, + struct clock_event_device *ced) +{ + int ced_cpu = cpumask_first(ced->cpumask); + + return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL)); +} + +/* + * Setup the RTC timer in oneshot mode + */ +static void uv_rtc_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + int ced_cpu = cpumask_first(evt->cpumask); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here yet */ + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + uv_rtc_unset_timer(ced_cpu, 1); + break; + } +} + +static void uv_rtc_interrupt(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); + + if (!ced || !ced->event_handler) + return; + + if (uv_rtc_unset_timer(cpu, 0) != 1) + return; + + ced->event_handler(ced); +} + +static int __init uv_enable_evt_rtc(char *str) +{ + uv_rtc_evt_enable = 1; + + return 1; +} +__setup("uvrtcevt", uv_enable_evt_rtc); + +static __init void uv_rtc_register_clockevents(struct work_struct *dummy) +{ + struct clock_event_device *ced = &__get_cpu_var(cpu_ced); + + *ced = clock_event_device_uv; + ced->cpumask = cpumask_of(smp_processor_id()); + clockevents_register_device(ced); +} + +static __init int uv_rtc_setup_clock(void) +{ + int rc; + + if (!is_uv_system()) + return -ENODEV; + + rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second); + if (rc) + printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); + else + printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", + sn_rtc_cycles_per_second/(unsigned long)1E6); + + if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) + return rc; + + /* Setup and register clockevents */ + rc = uv_rtc_allocate_timers(); + if (rc) + goto error; + + x86_platform_ipi_callback = uv_rtc_interrupt; + + clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, + NSEC_PER_SEC, clock_event_device_uv.shift); + + clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / + sn_rtc_cycles_per_second; + + clock_event_device_uv.max_delta_ns = clocksource_uv.mask * + (NSEC_PER_SEC / sn_rtc_cycles_per_second); + + rc = schedule_on_each_cpu(uv_rtc_register_clockevents); + if (rc) { + x86_platform_ipi_callback = NULL; + uv_rtc_deallocate_timers(); + goto error; + } + + printk(KERN_INFO "UV RTC clockevents registered\n"); + + return 0; + +error: + clocksource_unregister(&clocksource_uv); + printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); + + return rc; +} +arch_initcall(uv_rtc_setup_clock); diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile new file mode 100644 index 00000000..91bc17ab --- /dev/null +++ b/arch/x86/platform/visws/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c new file mode 100644 index 00000000..c7abf13a --- /dev/null +++ b/arch/x86/platform/visws/visws_quirks.c @@ -0,0 +1,608 @@ +/* + * SGI Visual Workstation support and quirks, unmaintained. + * + * Split out from setup.c by davej@suse.de + * + * Copyright (C) 1999 Bent Hagemark, Ingo Molnar + * + * SGI Visual Workstation interrupt controller + * + * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC + * which serves as the main interrupt controller in the system. Non-legacy + * hardware in the system uses this controller directly. Legacy devices + * are connected to the PIIX4 which in turn has its 8259(s) connected to + * a of the Cobalt APIC entry. + * + * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com + * + * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru> + */ +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/visws/cobalt.h> +#include <asm/visws/piix4.h> +#include <asm/io_apic.h> +#include <asm/fixmap.h> +#include <asm/reboot.h> +#include <asm/setup.h> +#include <asm/apic.h> +#include <asm/e820.h> +#include <asm/time.h> +#include <asm/io.h> + +#include <linux/kernel_stat.h> + +#include <asm/i8259.h> +#include <asm/irq_vectors.h> +#include <asm/visws/lithium.h> + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> + +extern int no_broadcast; + +char visws_board_type = -1; +char visws_board_rev = -1; + +static void __init visws_time_init(void) +{ + printk(KERN_INFO "Starting Cobalt Timer system clock\n"); + + /* Set the countdown value */ + co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); + + /* Start the timer */ + co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); + + /* Enable (unmask) the timer interrupt */ + co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); + + setup_default_timer_irq(); +} + +/* Replaces the default init_ISA_irqs in the generic setup */ +static void __init visws_pre_intr_init(void); + +/* Quirk for machine specific memory setup. */ + +#define MB (1024 * 1024) + +unsigned long sgivwfb_mem_phys; +unsigned long sgivwfb_mem_size; +EXPORT_SYMBOL(sgivwfb_mem_phys); +EXPORT_SYMBOL(sgivwfb_mem_size); + +long long mem_size __initdata = 0; + +static char * __init visws_memory_setup(void) +{ + long long gfx_mem_size = 8 * MB; + + mem_size = boot_params.alt_mem_k; + + if (!mem_size) { + printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); + mem_size = 128 * MB; + } + + /* + * this hardcodes the graphics memory to 8 MB + * it really should be sized dynamically (or at least + * set as a boot param) + */ + if (!sgivwfb_mem_size) { + printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); + sgivwfb_mem_size = 8 * MB; + } + + /* + * Trim to nearest MB + */ + sgivwfb_mem_size &= ~((1 << 20) - 1); + sgivwfb_mem_phys = mem_size - gfx_mem_size; + + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); + e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); + + return "PROM"; +} + +static void visws_machine_emergency_restart(void) +{ + /* + * Visual Workstations restart after this + * register is poked on the PIIX4 + */ + outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); +} + +static void visws_machine_power_off(void) +{ + unsigned short pm_status; +/* extern unsigned int pci_bus0; */ + + while ((pm_status = inw(PMSTS_PORT)) & 0x100) + outw(pm_status, PMSTS_PORT); + + outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); + + mdelay(10); + +#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ + (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + +/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */ + outl(PIIX_SPECIAL_STOP, 0xCFC); +} + +static void __init visws_get_smp_config(unsigned int early) +{ +} + +/* + * The Visual Workstation is Intel MP compliant in the hardware + * sense, but it doesn't have a BIOS(-configuration table). + * No problem for Linux. + */ + +static void __init MP_processor_info(struct mpc_cpu *m) +{ + int ver, logical_apicid; + physid_mask_t apic_cpus; + + if (!(m->cpuflag & CPU_ENABLED)) + return; + + logical_apicid = m->apicid; + printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", + m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", + m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, + (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver); + + if (m->cpuflag & CPU_BOOTPROCESSOR) + boot_cpu_physical_apicid = m->apicid; + + ver = m->apicver; + if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { + printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", + m->apicid, MAX_LOCAL_APIC); + return; + } + + apic->apicid_to_cpu_present(m->apicid, &apic_cpus); + physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); + /* + * Validate version + */ + if (ver == 0x0) { + printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + m->apicid); + ver = 0x10; + } + apic_version[m->apicid] = ver; +} + +static void __init visws_find_smp_config(void) +{ + struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); + unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); + + if (ncpus > CO_CPU_MAX) { + printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", + ncpus, mp); + + ncpus = CO_CPU_MAX; + } + + if (ncpus > setup_max_cpus) + ncpus = setup_max_cpus; + +#ifdef CONFIG_X86_LOCAL_APIC + smp_found_config = 1; +#endif + while (ncpus--) + MP_processor_info(mp++); + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; +} + +static void visws_trap_init(void); + +void __init visws_early_detect(void) +{ + int raw; + + visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) + >> PIIX_GPI_BD_SHIFT; + + if (visws_board_type < 0) + return; + + /* + * Override the default platform setup functions + */ + x86_init.resources.memory_setup = visws_memory_setup; + x86_init.mpparse.get_smp_config = visws_get_smp_config; + x86_init.mpparse.find_smp_config = visws_find_smp_config; + x86_init.irqs.pre_vector_init = visws_pre_intr_init; + x86_init.irqs.trap_init = visws_trap_init; + x86_init.timers.timer_init = visws_time_init; + x86_init.pci.init = pci_visws_init; + x86_init.pci.init_irq = x86_init_noop; + + /* + * Install reboot quirks: + */ + pm_power_off = visws_machine_power_off; + machine_ops.emergency_restart = visws_machine_emergency_restart; + + /* + * Do not use broadcast IPIs: + */ + no_broadcast = 0; + +#ifdef CONFIG_X86_IO_APIC + /* + * Turn off IO-APIC detection and initialization: + */ + skip_ioapic_setup = 1; +#endif + + /* + * Get Board rev. + * First, we have to initialize the 307 part to allow us access + * to the GPIO registers. Let's map them at 0x0fc0 which is right + * after the PIIX4 PM section. + */ + outb_p(SIO_DEV_SEL, SIO_INDEX); + outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ + + outb_p(SIO_DEV_MSB, SIO_INDEX); + outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ + + outb_p(SIO_DEV_LSB, SIO_INDEX); + outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ + + outb_p(SIO_DEV_ENB, SIO_INDEX); + outb_p(1, SIO_DATA); /* Enable GPIO registers. */ + + /* + * Now, we have to map the power management section to write + * a bit which enables access to the GPIO registers. + * What lunatic came up with this shit? + */ + outb_p(SIO_DEV_SEL, SIO_INDEX); + outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ + + outb_p(SIO_DEV_MSB, SIO_INDEX); + outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ + + outb_p(SIO_DEV_LSB, SIO_INDEX); + outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ + + outb_p(SIO_DEV_ENB, SIO_INDEX); + outb_p(1, SIO_DATA); /* Enable PM registers. */ + + /* + * Now, write the PM register which enables the GPIO registers. + */ + outb_p(SIO_PM_FER2, SIO_PM_INDEX); + outb_p(SIO_PM_GP_EN, SIO_PM_DATA); + + /* + * Now, initialize the GPIO registers. + * We want them all to be inputs which is the + * power on default, so let's leave them alone. + * So, let's just read the board rev! + */ + raw = inb_p(SIO_GP_DATA1); + raw &= 0x7f; /* 7 bits of valid board revision ID. */ + + if (visws_board_type == VISWS_320) { + if (raw < 0x6) { + visws_board_rev = 4; + } else if (raw < 0xc) { + visws_board_rev = 5; + } else { + visws_board_rev = 6; + } + } else if (visws_board_type == VISWS_540) { + visws_board_rev = 2; + } else { + visws_board_rev = raw; + } + + printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", + (visws_board_type == VISWS_320 ? "320" : + (visws_board_type == VISWS_540 ? "540" : + "unknown")), visws_board_rev); +} + +#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) +#define BCD (LI_INTB | LI_INTC | LI_INTD) +#define ALLDEVS (A01234 | BCD) + +static __init void lithium_init(void) +{ + set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); + set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); + + if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || + (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { + printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); +/* panic("This machine is not SGI Visual Workstation 320/540"); */ + } + + if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || + (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { + printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); +/* panic("This machine is not SGI Visual Workstation 320/540"); */ + } + + li_pcia_write16(LI_PCI_INTEN, ALLDEVS); + li_pcib_write16(LI_PCI_INTEN, ALLDEVS); +} + +static __init void cobalt_init(void) +{ + /* + * On normal SMP PC this is used only with SMP, but we have to + * use it and set it up here to start the Cobalt clock + */ + set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); + setup_local_APIC(); + printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", + (unsigned int)apic_read(APIC_LVR), + (unsigned int)apic_read(APIC_ID)); + + set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); + set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); + printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", + co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); + + /* Enable Cobalt APIC being careful to NOT change the ID! */ + co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); + + printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", + co_apic_read(CO_APIC_ID)); +} + +static void __init visws_trap_init(void) +{ + lithium_init(); + cobalt_init(); +} + +/* + * IRQ controller / APIC support: + */ + +static DEFINE_SPINLOCK(cobalt_lock); + +/* + * Set the given Cobalt APIC Redirection Table entry to point + * to the given IDT vector/index. + */ +static inline void co_apic_set(int entry, int irq) +{ + co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); + co_apic_write(CO_APIC_HI(entry), 0); +} + +/* + * Cobalt (IO)-APIC functions to handle PCI devices. + */ +static inline int co_apic_ide0_hack(void) +{ + extern char visws_board_type; + extern char visws_board_rev; + + if (visws_board_type == VISWS_320 && visws_board_rev == 5) + return 5; + return CO_APIC_IDE0; +} + +static int is_co_apic(unsigned int irq) +{ + if (IS_CO_APIC(irq)) + return CO_APIC(irq); + + switch (irq) { + case 0: return CO_APIC_CPU; + case CO_IRQ_IDE0: return co_apic_ide0_hack(); + case CO_IRQ_IDE1: return CO_APIC_IDE1; + default: return -1; + } +} + + +/* + * This is the SGI Cobalt (IO-)APIC: + */ +static void enable_cobalt_irq(struct irq_data *data) +{ + co_apic_set(is_co_apic(data->irq), data->irq); +} + +static void disable_cobalt_irq(struct irq_data *data) +{ + int entry = is_co_apic(data->irq); + + co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); + co_apic_read(CO_APIC_LO(entry)); +} + +static void ack_cobalt_irq(struct irq_data *data) +{ + unsigned long flags; + + spin_lock_irqsave(&cobalt_lock, flags); + disable_cobalt_irq(data); + apic_write(APIC_EOI, APIC_EIO_ACK); + spin_unlock_irqrestore(&cobalt_lock, flags); +} + +static struct irq_chip cobalt_irq_type = { + .name = "Cobalt-APIC", + .irq_enable = enable_cobalt_irq, + .irq_disable = disable_cobalt_irq, + .irq_ack = ack_cobalt_irq, +}; + + +/* + * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt + * -- not the manner expected by the code in i8259.c. + * + * there is a 'master' physical interrupt source that gets sent to + * the CPU. But in the chipset there are various 'virtual' interrupts + * waiting to be handled. We represent this to Linux through a 'master' + * interrupt controller type, and through a special virtual interrupt- + * controller. Device drivers only see the virtual interrupt sources. + */ +static unsigned int startup_piix4_master_irq(struct irq_data *data) +{ + legacy_pic->init(0); + enable_cobalt_irq(data); + return 0; +} + +static struct irq_chip piix4_master_irq_type = { + .name = "PIIX4-master", + .irq_startup = startup_piix4_master_irq, + .irq_ack = ack_cobalt_irq, +}; + +static void pii4_mask(struct irq_data *data) { } + +static struct irq_chip piix4_virtual_irq_type = { + .name = "PIIX4-virtual", + .irq_mask = pii4_mask, +}; + +/* + * PIIX4-8259 master/virtual functions to handle interrupt requests + * from legacy devices: floppy, parallel, serial, rtc. + * + * None of these get Cobalt APIC entries, neither do they have IDT + * entries. These interrupts are purely virtual and distributed from + * the 'master' interrupt source: CO_IRQ_8259. + * + * When the 8259 interrupts its handler figures out which of these + * devices is interrupting and dispatches to its handler. + * + * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ + * enable_irq gets the right irq. This 'master' irq is never directly + * manipulated by any driver. + */ +static irqreturn_t piix4_master_intr(int irq, void *dev_id) +{ + unsigned long flags; + int realirq; + + raw_spin_lock_irqsave(&i8259A_lock, flags); + + /* Find out what's interrupting in the PIIX4 master 8259 */ + outb(0x0c, 0x20); /* OCW3 Poll command */ + realirq = inb(0x20); + + /* + * Bit 7 == 0 means invalid/spurious + */ + if (unlikely(!(realirq & 0x80))) + goto out_unlock; + + realirq &= 7; + + if (unlikely(realirq == 2)) { + outb(0x0c, 0xa0); + realirq = inb(0xa0); + + if (unlikely(!(realirq & 0x80))) + goto out_unlock; + + realirq = (realirq & 7) + 8; + } + + /* mask and ack interrupt */ + cached_irq_mask |= 1 << realirq; + if (unlikely(realirq > 7)) { + inb(0xa1); + outb(cached_slave_mask, 0xa1); + outb(0x60 + (realirq & 7), 0xa0); + outb(0x60 + 2, 0x20); + } else { + inb(0x21); + outb(cached_master_mask, 0x21); + outb(0x60 + realirq, 0x20); + } + + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + + /* + * handle this 'virtual interrupt' as a Cobalt one now. + */ + generic_handle_irq(realirq); + + return IRQ_HANDLED; + +out_unlock: + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + return IRQ_NONE; +} + +static struct irqaction master_action = { + .handler = piix4_master_intr, + .name = "PIIX4-8259", + .flags = IRQF_NO_THREAD, +}; + +static struct irqaction cascade_action = { + .handler = no_action, + .name = "cascade", + .flags = IRQF_NO_THREAD, +}; + +static inline void set_piix4_virtual_irq_type(void) +{ + piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask; + piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask; + piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask; +} + +static void __init visws_pre_intr_init(void) +{ + int i; + + set_piix4_virtual_irq_type(); + + for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { + struct irq_chip *chip = NULL; + + if (i == 0) + chip = &cobalt_irq_type; + else if (i == CO_IRQ_IDE0) + chip = &cobalt_irq_type; + else if (i == CO_IRQ_IDE1) + chip = &cobalt_irq_type; + else if (i == CO_IRQ_8259) + chip = &piix4_master_irq_type; + else if (i < CO_IRQ_APIC0) + chip = &piix4_virtual_irq_type; + else if (IS_CO_APIC(i)) + chip = &cobalt_irq_type; + + if (chip) + irq_set_chip(i, chip); + } + + setup_irq(CO_IRQ_8259, &master_action); + setup_irq(2, &cascade_action); +} |